diff options
author | Chandler Carruth <chandlerc@gmail.com> | 2014-10-19 19:13:49 +0000 |
---|---|---|
committer | Chandler Carruth <chandlerc@gmail.com> | 2014-10-19 19:13:49 +0000 |
commit | 0c4b230b32ba04494deeabc4415f7248aa455068 (patch) | |
tree | f9eec5ca4edd62c66504ac86ac32a12cebe40212 /clang/lib/CodeGen/CGExprComplex.cpp | |
parent | 25d2496cd2f98ac0b30eee94f0e0c72655af6b55 (diff) | |
download | bcm5719-llvm-0c4b230b32ba04494deeabc4415f7248aa455068.tar.gz bcm5719-llvm-0c4b230b32ba04494deeabc4415f7248aa455068.zip |
[complex] Teach the complex math IR gen to emit direct math and
a NaN-test prior to the call to the library function.
This should automatically make fastmath (including just non-NaNs) able to avoid
the expensive libcalls and also open the door to more advanced folding in LLVM
based on the rules for complex math.
Two important notes to remember: first is that this isn't yet a proper
limited range mode, it's still just improving the unlimited range mode.
Also, it isn't really perfecet w.r.t. what an unlimited range mode
should be doing because it isn't quite handling the flags produced by
all the operations in the way desirable for that mode, but then neither
is compiler-rt's libcall. When the compiler-rt libcall is improved to
carefully manage flags, the code emitted here should be improved
correspondingly. And it is still a long-term desirable thing to add
a limited range mode to Clang that would be able to use direct math
without library calls here.
Special thanks to Steve Canon for the careful review on this patch and
teaching me about these issues. =D
Differential Revision: http://reviews.llvm.org/D5756
llvm-svn: 220167
Diffstat (limited to 'clang/lib/CodeGen/CGExprComplex.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGExprComplex.cpp | 100 |
1 files changed, 83 insertions, 17 deletions
diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index b1cc1efa58e..e957256ab5d 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -15,9 +15,13 @@ #include "CodeGenModule.h" #include "clang/AST/ASTContext.h" #include "clang/AST/StmtVisitor.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include <algorithm> using namespace clang; using namespace CodeGen; @@ -587,11 +591,31 @@ ComplexPairTy ComplexExprEmitter::EmitComplexBinOpLibCall(StringRef LibCallName, return CGF.EmitCall(FuncInfo, Func, ReturnValueSlot(), Args).getComplexVal(); } +/// \brief Lookup the libcall name for a given floating point type complex +/// multiply. +static StringRef getComplexMultiplyLibCallName(llvm::Type *Ty) { + switch (Ty->getTypeID()) { + default: + llvm_unreachable("Unsupported floating point type!"); + case llvm::Type::HalfTyID: + return "__mulhc3"; + case llvm::Type::FloatTyID: + return "__mulsc3"; + case llvm::Type::DoubleTyID: + return "__muldc3"; + case llvm::Type::PPC_FP128TyID: + return "__multc3"; + case llvm::Type::X86_FP80TyID: + return "__mulxc3"; + } +} + // See C11 Annex G.5.1 for the semantics of multiplicative operators on complex // typed values. ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { using llvm::Value; Value *ResR, *ResI; + llvm::MDBuilder MDHelper(CGF.getLLVMContext()); if (Op.LHS.first->getType()->isFloatingPointTy()) { // The general formulation is: @@ -603,23 +627,65 @@ ComplexPairTy ComplexExprEmitter::EmitBinMul(const BinOpInfo &Op) { // still more of this within the type system. if (Op.LHS.second && Op.RHS.second) { - // If both operands are complex, delegate to a libcall which works to - // prevent underflow and overflow. - StringRef LibCallName; - switch (Op.LHS.first->getType()->getTypeID()) { - default: - llvm_unreachable("Unsupported floating point type!"); - case llvm::Type::HalfTyID: - return EmitComplexBinOpLibCall("__mulhc3", Op); - case llvm::Type::FloatTyID: - return EmitComplexBinOpLibCall("__mulsc3", Op); - case llvm::Type::DoubleTyID: - return EmitComplexBinOpLibCall("__muldc3", Op); - case llvm::Type::PPC_FP128TyID: - return EmitComplexBinOpLibCall("__multc3", Op); - case llvm::Type::X86_FP80TyID: - return EmitComplexBinOpLibCall("__mulxc3", Op); - } + // If both operands are complex, emit the core math directly, and then + // test for NaNs. If we find NaNs in the result, we delegate to a libcall + // to carefully re-compute the correct infinity representation if + // possible. The expectation is that the presence of NaNs here is + // *extremely* rare, and so the cost of the libcall is almost irrelevant. + // This is good, because the libcall re-computes the core multiplication + // exactly the same as we do here and re-tests for NaNs in order to be + // a generic complex*complex libcall. + + // First compute the four products. + Value *AC = Builder.CreateFMul(Op.LHS.first, Op.RHS.first, "mul_ac"); + Value *BD = Builder.CreateFMul(Op.LHS.second, Op.RHS.second, "mul_bd"); + Value *AD = Builder.CreateFMul(Op.LHS.first, Op.RHS.second, "mul_ad"); + Value *BC = Builder.CreateFMul(Op.LHS.second, Op.RHS.first, "mul_bc"); + + // The real part is the difference of the first two, the imaginary part is + // the sum of the second. + ResR = Builder.CreateFSub(AC, BD, "mul_r"); + ResI = Builder.CreateFAdd(AD, BC, "mul_i"); + + // Emit the test for the real part becoming NaN and create a branch to + // handle it. We test for NaN by comparing the number to itself. + Value *IsRNaN = Builder.CreateFCmpUNO(ResR, ResR, "isnan_cmp"); + llvm::BasicBlock *ContBB = CGF.createBasicBlock("complex_mul_cont"); + llvm::BasicBlock *INaNBB = CGF.createBasicBlock("complex_mul_imag_nan"); + llvm::Instruction *Branch = Builder.CreateCondBr(IsRNaN, INaNBB, ContBB); + llvm::BasicBlock *OrigBB = Branch->getParent(); + + // Give hint that we very much don't expect to see NaNs. + // Value chosen to match UR_NONTAKEN_WEIGHT, see BranchProbabilityInfo.cpp + llvm::MDNode *BrWeight = MDHelper.createBranchWeights(1, (1U << 20) - 1); + Branch->setMetadata(llvm::LLVMContext::MD_prof, BrWeight); + + // Now test the imaginary part and create its branch. + CGF.EmitBlock(INaNBB); + Value *IsINaN = Builder.CreateFCmpUNO(ResI, ResI, "isnan_cmp"); + llvm::BasicBlock *LibCallBB = CGF.createBasicBlock("complex_mul_libcall"); + Branch = Builder.CreateCondBr(IsINaN, LibCallBB, ContBB); + Branch->setMetadata(llvm::LLVMContext::MD_prof, BrWeight); + + // Now emit the libcall on this slowest of the slow paths. + CGF.EmitBlock(LibCallBB); + Value *LibCallR, *LibCallI; + std::tie(LibCallR, LibCallI) = EmitComplexBinOpLibCall( + getComplexMultiplyLibCallName(Op.LHS.first->getType()), Op); + Builder.CreateBr(ContBB); + + // Finally continue execution by phi-ing together the different + // computation paths. + CGF.EmitBlock(ContBB); + llvm::PHINode *RealPHI = Builder.CreatePHI(ResR->getType(), 3, "real_mul_phi"); + RealPHI->addIncoming(ResR, OrigBB); + RealPHI->addIncoming(ResR, INaNBB); + RealPHI->addIncoming(LibCallR, LibCallBB); + llvm::PHINode *ImagPHI = Builder.CreatePHI(ResI->getType(), 3, "imag_mul_phi"); + ImagPHI->addIncoming(ResI, OrigBB); + ImagPHI->addIncoming(ResI, INaNBB); + ImagPHI->addIncoming(LibCallI, LibCallBB); + return ComplexPairTy(RealPHI, ImagPHI); } assert((Op.LHS.second || Op.RHS.second) && "At least one operand must be complex!"); |