summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CodeGenFunction.h
diff options
context:
space:
mode:
authorErich Keane <erich.keane@intel.com>2018-07-20 14:13:28 +0000
committerErich Keane <erich.keane@intel.com>2018-07-20 14:13:28 +0000
commit3efe00206f0f470bf322321efcb915b54f18118c (patch)
tree1b4a550cffc5378a55e9da580503f35791af0f88 /clang/lib/CodeGen/CodeGenFunction.h
parentf907e19b5eafa7349cc848951778a576a4b5f141 (diff)
downloadbcm5719-llvm-3efe00206f0f470bf322321efcb915b54f18118c.tar.gz
bcm5719-llvm-3efe00206f0f470bf322321efcb915b54f18118c.zip
Implement cpu_dispatch/cpu_specific Multiversioning
As documented here: https://software.intel.com/en-us/node/682969 and https://software.intel.com/en-us/node/523346. cpu_dispatch multiversioning is an ICC feature that provides for function multiversioning. This feature is implemented with two attributes: First, cpu_specific, which specifies the individual function versions. Second, cpu_dispatch, which specifies the location of the resolver function and the list of resolvable functions. This is valuable since it provides a mechanism where the resolver's TU can be specified in one location, and the individual implementions each in their own translation units. The goal of this patch is to be source-compatible with ICC, so this implementation diverges from the ICC implementation in a few ways: 1- Linux x86/64 only: This implementation uses ifuncs in order to properly dispatch functions. This is is a valuable performance benefit over the ICC implementation. A future patch will be provided to enable this feature on Windows, but it will obviously more closely fit ICC's implementation. 2- CPU Identification functions: ICC uses a set of custom functions to identify the feature list of the host processor. This patch uses the cpu_supports functionality in order to better align with 'target' multiversioning. 1- cpu_dispatch function def/decl: ICC's cpu_dispatch requires that the function marked cpu_dispatch be an empty definition. This patch supports that as well, however declarations are also permitted, since the linker will solve the issue of multiple emissions. Differential Revision: https://reviews.llvm.org/D47474 llvm-svn: 337552
Diffstat (limited to 'clang/lib/CodeGen/CodeGenFunction.h')
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h35
1 files changed, 28 insertions, 7 deletions
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index d6deb655824..1fc445c66df 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4113,12 +4113,13 @@ public:
void EmitSanitizerStatReport(llvm::SanitizerStatKind SSK);
- struct MultiVersionResolverOption {
+ struct TargetMultiVersionResolverOption {
llvm::Function *Function;
TargetAttr::ParsedTargetAttr ParsedAttribute;
unsigned Priority;
- MultiVersionResolverOption(const TargetInfo &TargInfo, llvm::Function *F,
- const clang::TargetAttr::ParsedTargetAttr &PT)
+ TargetMultiVersionResolverOption(
+ const TargetInfo &TargInfo, llvm::Function *F,
+ const clang::TargetAttr::ParsedTargetAttr &PT)
: Function(F), ParsedAttribute(PT), Priority(0u) {
for (StringRef Feat : PT.Features)
Priority = std::max(Priority,
@@ -4129,12 +4130,30 @@ public:
TargInfo.multiVersionSortPriority(PT.Architecture));
}
- bool operator>(const MultiVersionResolverOption &Other) const {
+ bool operator>(const TargetMultiVersionResolverOption &Other) const {
return Priority > Other.Priority;
}
};
- void EmitMultiVersionResolver(llvm::Function *Resolver,
- ArrayRef<MultiVersionResolverOption> Options);
+ void EmitTargetMultiVersionResolver(
+ llvm::Function *Resolver,
+ ArrayRef<TargetMultiVersionResolverOption> Options);
+
+ struct CPUDispatchMultiVersionResolverOption {
+ llvm::Function *Function;
+ // Note: EmitX86CPUSupports only has 32 bits available, so we store the mask
+ // as 32 bits here. When 64-bit support is added to __builtin_cpu_supports,
+ // this can be extended to 64 bits.
+ uint32_t FeatureMask;
+ CPUDispatchMultiVersionResolverOption(llvm::Function *F, uint64_t Mask)
+ : Function(F), FeatureMask(static_cast<uint32_t>(Mask)) {}
+ bool operator>(const CPUDispatchMultiVersionResolverOption &Other) const {
+ return FeatureMask > Other.FeatureMask;
+ }
+ };
+ void EmitCPUDispatchMultiVersionResolver(
+ llvm::Function *Resolver,
+ ArrayRef<CPUDispatchMultiVersionResolverOption> Options);
+ static uint32_t GetX86CpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
private:
QualType getVarArgType(const Expr *Arg);
@@ -4151,8 +4170,10 @@ private:
llvm::Value *EmitX86CpuIs(StringRef CPUStr);
llvm::Value *EmitX86CpuSupports(const CallExpr *E);
llvm::Value *EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs);
+ llvm::Value *EmitX86CpuSupports(uint32_t Mask);
llvm::Value *EmitX86CpuInit();
- llvm::Value *FormResolverCondition(const MultiVersionResolverOption &RO);
+ llvm::Value *
+ FormResolverCondition(const TargetMultiVersionResolverOption &RO);
};
/// Helper class with most of the code for saving a value for a
OpenPOWER on IntegriCloud