summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CodeGenFunction.cpp
diff options
context:
space:
mode:
authorErich Keane <erich.keane@intel.com>2018-07-20 14:13:28 +0000
committerErich Keane <erich.keane@intel.com>2018-07-20 14:13:28 +0000
commit3efe00206f0f470bf322321efcb915b54f18118c (patch)
tree1b4a550cffc5378a55e9da580503f35791af0f88 /clang/lib/CodeGen/CodeGenFunction.cpp
parentf907e19b5eafa7349cc848951778a576a4b5f141 (diff)
downloadbcm5719-llvm-3efe00206f0f470bf322321efcb915b54f18118c.tar.gz
bcm5719-llvm-3efe00206f0f470bf322321efcb915b54f18118c.zip
Implement cpu_dispatch/cpu_specific Multiversioning
As documented here: https://software.intel.com/en-us/node/682969 and https://software.intel.com/en-us/node/523346. cpu_dispatch multiversioning is an ICC feature that provides for function multiversioning. This feature is implemented with two attributes: First, cpu_specific, which specifies the individual function versions. Second, cpu_dispatch, which specifies the location of the resolver function and the list of resolvable functions. This is valuable since it provides a mechanism where the resolver's TU can be specified in one location, and the individual implementions each in their own translation units. The goal of this patch is to be source-compatible with ICC, so this implementation diverges from the ICC implementation in a few ways: 1- Linux x86/64 only: This implementation uses ifuncs in order to properly dispatch functions. This is is a valuable performance benefit over the ICC implementation. A future patch will be provided to enable this feature on Windows, but it will obviously more closely fit ICC's implementation. 2- CPU Identification functions: ICC uses a set of custom functions to identify the feature list of the host processor. This patch uses the cpu_supports functionality in order to better align with 'target' multiversioning. 1- cpu_dispatch function def/decl: ICC's cpu_dispatch requires that the function marked cpu_dispatch be an empty definition. This patch supports that as well, however declarations are also permitted, since the linker will solve the issue of multiple emissions. Differential Revision: https://reviews.llvm.org/D47474 llvm-svn: 337552
Diffstat (limited to 'clang/lib/CodeGen/CodeGenFunction.cpp')
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.cpp52
1 files changed, 46 insertions, 6 deletions
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 99af1a18e81..3c582688e91 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -2323,7 +2323,8 @@ void CodeGenFunction::checkTargetFeatures(const CallExpr *E,
<< TargetDecl->getDeclName()
<< CGM.getContext().BuiltinInfo.getRequiredFeatures(BuiltinID);
- } else if (TargetDecl->hasAttr<TargetAttr>()) {
+ } else if (TargetDecl->hasAttr<TargetAttr>() ||
+ TargetDecl->hasAttr<CPUSpecificAttr>()) {
// Get the required features for the callee.
const TargetAttr *TD = TargetDecl->getAttr<TargetAttr>();
@@ -2358,8 +2359,8 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) {
CGM.getSanStats().create(IRB, SSK);
}
-llvm::Value *
-CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) {
+llvm::Value *CodeGenFunction::FormResolverCondition(
+ const TargetMultiVersionResolverOption &RO) {
llvm::Value *TrueCondition = nullptr;
if (!RO.ParsedAttribute.Architecture.empty())
TrueCondition = EmitX86CpuIs(RO.ParsedAttribute.Architecture);
@@ -2377,8 +2378,9 @@ CodeGenFunction::FormResolverCondition(const MultiVersionResolverOption &RO) {
return TrueCondition;
}
-void CodeGenFunction::EmitMultiVersionResolver(
- llvm::Function *Resolver, ArrayRef<MultiVersionResolverOption> Options) {
+void CodeGenFunction::EmitTargetMultiVersionResolver(
+ llvm::Function *Resolver,
+ ArrayRef<TargetMultiVersionResolverOption> Options) {
assert((getContext().getTargetInfo().getTriple().getArch() ==
llvm::Triple::x86 ||
getContext().getTargetInfo().getTriple().getArch() ==
@@ -2391,7 +2393,7 @@ void CodeGenFunction::EmitMultiVersionResolver(
EmitX86CpuInit();
llvm::Function *DefaultFunc = nullptr;
- for (const MultiVersionResolverOption &RO : Options) {
+ for (const TargetMultiVersionResolverOption &RO : Options) {
Builder.SetInsertPoint(CurBlock);
llvm::Value *TrueCondition = FormResolverCondition(RO);
@@ -2412,6 +2414,44 @@ void CodeGenFunction::EmitMultiVersionResolver(
Builder.CreateRet(DefaultFunc);
}
+void CodeGenFunction::EmitCPUDispatchMultiVersionResolver(
+ llvm::Function *Resolver,
+ ArrayRef<CPUDispatchMultiVersionResolverOption> Options) {
+ assert((getContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::x86 ||
+ getContext().getTargetInfo().getTriple().getArch() ==
+ llvm::Triple::x86_64) &&
+ "Only implemented for x86 targets");
+
+ // Main function's basic block.
+ llvm::BasicBlock *CurBlock = createBasicBlock("resolver_entry", Resolver);
+ Builder.SetInsertPoint(CurBlock);
+ EmitX86CpuInit();
+
+ for (const CPUDispatchMultiVersionResolverOption &RO : Options) {
+ Builder.SetInsertPoint(CurBlock);
+
+ // "generic" case should catch-all.
+ if (RO.FeatureMask == 0) {
+ Builder.CreateRet(RO.Function);
+ return;
+ }
+ llvm::BasicBlock *RetBlock = createBasicBlock("resolver_return", Resolver);
+ llvm::IRBuilder<> RetBuilder(RetBlock);
+ RetBuilder.CreateRet(RO.Function);
+ CurBlock = createBasicBlock("resolver_else", Resolver);
+ llvm::Value *TrueCondition = EmitX86CpuSupports(RO.FeatureMask);
+ Builder.CreateCondBr(TrueCondition, RetBlock, CurBlock);
+ }
+
+ Builder.SetInsertPoint(CurBlock);
+ llvm::CallInst *TrapCall = EmitTrapCall(llvm::Intrinsic::trap);
+ TrapCall->setDoesNotReturn();
+ TrapCall->setDoesNotThrow();
+ Builder.CreateUnreachable();
+ Builder.ClearInsertionPoint();
+}
+
llvm::DebugLoc CodeGenFunction::SourceLocToDebugLoc(SourceLocation Location) {
if (CGDebugInfo *DI = getDebugInfo())
return DI->SourceLocToDebugLoc(Location);
OpenPOWER on IntegriCloud