diff options
author | Erich Keane <erich.keane@intel.com> | 2018-07-20 14:13:28 +0000 |
---|---|---|
committer | Erich Keane <erich.keane@intel.com> | 2018-07-20 14:13:28 +0000 |
commit | 3efe00206f0f470bf322321efcb915b54f18118c (patch) | |
tree | 1b4a550cffc5378a55e9da580503f35791af0f88 /clang/lib/CodeGen/CodeGenFunction.cpp | |
parent | f907e19b5eafa7349cc848951778a576a4b5f141 (diff) | |
download | bcm5719-llvm-3efe00206f0f470bf322321efcb915b54f18118c.tar.gz bcm5719-llvm-3efe00206f0f470bf322321efcb915b54f18118c.zip |
Implement cpu_dispatch/cpu_specific Multiversioning
As documented here: https://software.intel.com/en-us/node/682969 and
https://software.intel.com/en-us/node/523346. cpu_dispatch multiversioning
is an ICC feature that provides for function multiversioning.
This feature is implemented with two attributes: First, cpu_specific,
which specifies the individual function versions. Second, cpu_dispatch,
which specifies the location of the resolver function and the list of
resolvable functions.
This is valuable since it provides a mechanism where the resolver's TU
can be specified in one location, and the individual implementions
each in their own translation units.
The goal of this patch is to be source-compatible with ICC, so this
implementation diverges from the ICC implementation in a few ways:
1- Linux x86/64 only: This implementation uses ifuncs in order to
properly dispatch functions. This is is a valuable performance benefit
over the ICC implementation. A future patch will be provided to enable
this feature on Windows, but it will obviously more closely fit ICC's
implementation.
2- CPU Identification functions: ICC uses a set of custom functions to identify
the feature list of the host processor. This patch uses the cpu_supports
functionality in order to better align with 'target' multiversioning.
1- cpu_dispatch function def/decl: ICC's cpu_dispatch requires that the function
marked cpu_dispatch be an empty definition. This patch supports that as well,
however declarations are also permitted, since the linker will solve the
issue of multiple emissions.
Differential Revision: https://reviews.llvm.org/D47474
llvm-svn: 337552
Diffstat (limited to 'clang/lib/CodeGen/CodeGenFunction.cpp')
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.cpp | 52 |
1 files changed, 46 insertions, 6 deletions
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 99af1a18e81..3c582688e91 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2323,7 +2323,8 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E, << TargetDecl->getDeclName() << CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID); - } else if (TargetDecl->hasAttr<TargetAttr>()) { + } else if (TargetDecl->hasAttr<TargetAttr>() || + TargetDecl->hasAttr<CPUSpecificAttr>()) { // Get the required features for the callee. const TargetAttr *TD = TargetDecl->getAttr<TargetAttr>(); @@ -2358,8 +2359,8 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) { CGM.getSanStats().create(IRB, SSK); } -llvm::Value * -CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) { +llvm::Value *CodeGenFunction::FormResolverCondition( + const TargetMultiVersionResolverOption &RO) { llvm::Value *TrueCondition = nullptr; if (!RO.ParsedAttribute.Architecture.empty()) TrueCondition = EmitX86CpuIs(RO.ParsedAttribute.Architecture); @@ -2377,8 +2378,9 @@ CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) { return TrueCondition; } -void CodeGenFunction::EmitMultiVersionResolver( - llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) { +void CodeGenFunction::EmitTargetMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef<TargetMultiVersionResolverOption> Options) { assert((getContext().getTargetInfo().getTriple().getArch() == llvm::Triple::x86 || getContext().getTargetInfo().getTriple().getArch() == @@ -2391,7 +2393,7 @@ void CodeGenFunction::EmitMultiVersionResolver( EmitX86CpuInit(); llvm::Function *DefaultFunc = nullptr; - for (const MultiVersionResolverOption &RO : Options) { + for (const TargetMultiVersionResolverOption &RO : Options) { Builder.SetInsertPoint(CurBlock); llvm::Value *TrueCondition = FormResolverCondition(RO); @@ -2412,6 +2414,44 @@ void CodeGenFunction::EmitMultiVersionResolver( Builder.CreateRet(DefaultFunc); } +void CodeGenFunction::EmitCPUDispatchMultiVersionResolver( + llvm::Function *Resolver, + ArrayRef<CPUDispatchMultiVersionResolverOption> Options) { + assert((getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86 || + getContext().getTargetInfo().getTriple().getArch() == + llvm::Triple::x86_64) && + "Only implemented for x86 targets"); + + // Main function's basic block. + llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver); + Builder.SetInsertPoint(CurBlock); + EmitX86CpuInit(); + + for (const CPUDispatchMultiVersionResolverOption &RO : Options) { + Builder.SetInsertPoint(CurBlock); + + // "generic" case should catch-all. + if (RO.FeatureMask == 0) { + Builder.CreateRet(RO.Function); + return; + } + llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver); + llvm::IRBuilder<> RetBuilder(RetBlock); + RetBuilder.CreateRet(RO.Function); + CurBlock = createBasicBlock("resolver_else", Resolver); + llvm::Value *TrueCondition = EmitX86CpuSupports(RO.FeatureMask); + Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock); + } + + Builder.SetInsertPoint(CurBlock); + llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap); + TrapCall->setDoesNotReturn(); + TrapCall->setDoesNotThrow(); + Builder.CreateUnreachable(); + Builder.ClearInsertionPoint(); +} + llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) { if (CGDebugInfo *DI = getDebugInfo()) return DI->SourceLocToDebugLoc(Location); |