diff options
author | Erich Keane <erich.keane@intel.com> | 2018-07-20 14:13:28 +0000 |
---|---|---|
committer | Erich Keane <erich.keane@intel.com> | 2018-07-20 14:13:28 +0000 |
commit | 3efe00206f0f470bf322321efcb915b54f18118c (patch) | |
tree | 1b4a550cffc5378a55e9da580503f35791af0f88 /clang/test/CodeGen/attr-cpuspecific.c | |
parent | f907e19b5eafa7349cc848951778a576a4b5f141 (diff) | |
download | bcm5719-llvm-3efe00206f0f470bf322321efcb915b54f18118c.tar.gz bcm5719-llvm-3efe00206f0f470bf322321efcb915b54f18118c.zip |
Implement cpu_dispatch/cpu_specific Multiversioning
As documented here: https://software.intel.com/en-us/node/682969 and
https://software.intel.com/en-us/node/523346. cpu_dispatch multiversioning
is an ICC feature that provides for function multiversioning.
This feature is implemented with two attributes: First, cpu_specific,
which specifies the individual function versions. Second, cpu_dispatch,
which specifies the location of the resolver function and the list of
resolvable functions.
This is valuable since it provides a mechanism where the resolver's TU
can be specified in one location, and the individual implementions
each in their own translation units.
The goal of this patch is to be source-compatible with ICC, so this
implementation diverges from the ICC implementation in a few ways:
1- Linux x86/64 only: This implementation uses ifuncs in order to
properly dispatch functions. This is is a valuable performance benefit
over the ICC implementation. A future patch will be provided to enable
this feature on Windows, but it will obviously more closely fit ICC's
implementation.
2- CPU Identification functions: ICC uses a set of custom functions to identify
the feature list of the host processor. This patch uses the cpu_supports
functionality in order to better align with 'target' multiversioning.
1- cpu_dispatch function def/decl: ICC's cpu_dispatch requires that the function
marked cpu_dispatch be an empty definition. This patch supports that as well,
however declarations are also permitted, since the linker will solve the
issue of multiple emissions.
Differential Revision: https://reviews.llvm.org/D47474
llvm-svn: 337552
Diffstat (limited to 'clang/test/CodeGen/attr-cpuspecific.c')
-rw-r--r-- | clang/test/CodeGen/attr-cpuspecific.c | 101 |
1 files changed, 101 insertions, 0 deletions
diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c new file mode 100644 index 00000000000..1b98b5dc967 --- /dev/null +++ b/clang/test/CodeGen/attr-cpuspecific.c @@ -0,0 +1,101 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s + + +// Each called version should have an IFunc. +// CHECK: @SingleVersion.ifunc = ifunc void (), void ()* ()* @SingleVersion.resolver +// CHECK: @TwoVersions.ifunc = ifunc void (), void ()* ()* @TwoVersions.resolver +// CHECK: @TwoVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @TwoVersionsSameAttr.resolver +// CHECK: @ThreeVersionsSameAttr.ifunc = ifunc void (), void ()* ()* @ThreeVersionsSameAttr.resolver + +__attribute__((cpu_specific(ivybridge))) +void SingleVersion(void){} +// CHECK: define void @SingleVersion.S() #[[S:[0-9]+]] + +__attribute__((cpu_specific(ivybridge))) +void NotCalled(void){} +// CHECK: define void @NotCalled.S() #[[S]] + +// Done before any of the implementations. +__attribute__((cpu_dispatch(ivybridge, knl))) +void TwoVersions(void); +// CHECK: define void ()* @TwoVersions.resolver() +// CHECK: call void @__cpu_indicator_init +// CHECK: ret void ()* @TwoVersions.Z +// CHECK: ret void ()* @TwoVersions.S +// CHECK: call void @llvm.trap +// CHECK: unreachable + +__attribute__((cpu_specific(ivybridge))) +void TwoVersions(void){} +// CHECK: define void @TwoVersions.S() #[[S]] + +__attribute__((cpu_specific(knl))) +void TwoVersions(void){} +// CHECK: define void @TwoVersions.Z() #[[K:[0-9]+]] + +__attribute__((cpu_specific(ivybridge, knl))) +void TwoVersionsSameAttr(void){} +// CHECK: define void @TwoVersionsSameAttr.S() #[[S]] +// CHECK: define void @TwoVersionsSameAttr.Z() #[[K]] + +__attribute__((cpu_specific(atom, ivybridge, knl))) +void ThreeVersionsSameAttr(void){} +// CHECK: define void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]] +// CHECK: define void @ThreeVersionsSameAttr.S() #[[S]] +// CHECK: define void @ThreeVersionsSameAttr.Z() #[[K]] + +void usages() { + SingleVersion(); + // CHECK: @SingleVersion.ifunc() + TwoVersions(); + // CHECK: @TwoVersions.ifunc() + TwoVersionsSameAttr(); + // CHECK: @TwoVersionsSameAttr.ifunc() + ThreeVersionsSameAttr(); + // CHECK: @ThreeVersionsSameAttr.ifunc() +} + +// has an extra config to emit! +__attribute__((cpu_dispatch(ivybridge, knl, atom))) +void TwoVersionsSameAttr(void); +// CHECK: define void ()* @TwoVersionsSameAttr.resolver() +// CHECK: ret void ()* @TwoVersionsSameAttr.Z +// CHECK: ret void ()* @TwoVersionsSameAttr.S +// CHECK: ret void ()* @TwoVersionsSameAttr.O +// CHECK: call void @llvm.trap +// CHECK: unreachable + +__attribute__((cpu_dispatch(atom, ivybridge, knl))) +void ThreeVersionsSameAttr(void){} +// CHECK: define void ()* @ThreeVersionsSameAttr.resolver() +// CHECK: call void @__cpu_indicator_init +// CHECK: ret void ()* @ThreeVersionsSameAttr.Z +// CHECK: ret void ()* @ThreeVersionsSameAttr.S +// CHECK: ret void ()* @ThreeVersionsSameAttr.O +// CHECK: call void @llvm.trap +// CHECK: unreachable + +// No Cpu Specific options. +__attribute__((cpu_dispatch(atom, ivybridge, knl))) +void NoSpecifics(void); +// CHECK: define void ()* @NoSpecifics.resolver() +// CHECK: call void @__cpu_indicator_init +// CHECK: ret void ()* @NoSpecifics.Z +// CHECK: ret void ()* @NoSpecifics.S +// CHECK: ret void ()* @NoSpecifics.O +// CHECK: call void @llvm.trap +// CHECK: unreachable + +__attribute__((cpu_dispatch(atom, generic, ivybridge, knl))) +void HasGeneric(void); +// CHECK: define void ()* @HasGeneric.resolver() +// CHECK: call void @__cpu_indicator_init +// CHECK: ret void ()* @HasGeneric.Z +// CHECK: ret void ()* @HasGeneric.S +// CHECK: ret void ()* @HasGeneric.O +// CHECK: ret void ()* @HasGeneric.A +// CHECK-NOT: call void @llvm.trap + +// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" |