diff options
| author | Artem Belevich <tra@google.com> | 2017-09-20 21:23:07 +0000 |
|---|---|---|
| committer | Artem Belevich <tra@google.com> | 2017-09-20 21:23:07 +0000 |
| commit | 4654dc89beb0884dd707f402a06c1884f88196ad (patch) | |
| tree | 004a34e5afeaaae1565dccb57f5907aa777b889a /clang/test/CodeGen | |
| parent | 562bf99ee61dbc28901253590780981c45e38e72 (diff) | |
| download | bcm5719-llvm-4654dc89beb0884dd707f402a06c1884f88196ad.tar.gz bcm5719-llvm-4654dc89beb0884dd707f402a06c1884f88196ad.zip | |
[NVPTX] Implemented shfl.sync instruction and supporting intrinsics/builtins.
Differential Revision: https://reviews.llvm.org/D38090
llvm-svn: 313820
Diffstat (limited to 'clang/test/CodeGen')
| -rw-r--r-- | clang/test/CodeGen/builtins-nvptx-ptx60.cu | 40 | ||||
| -rw-r--r-- | clang/test/CodeGen/builtins-nvptx.c | 21 |
2 files changed, 61 insertions, 0 deletions
diff --git a/clang/test/CodeGen/builtins-nvptx-ptx60.cu b/clang/test/CodeGen/builtins-nvptx-ptx60.cu new file mode 100644 index 00000000000..e06c84c150c --- /dev/null +++ b/clang/test/CodeGen/builtins-nvptx-ptx60.cu @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \ +// RUN: -fcuda-is-device -target-feature +ptx60 \ +// RUN: -S -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=CHECK %s +// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \ +// RUN: -fcuda-is-device -S -o /dev/null -x cuda -verify %s + +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) + +// CHECK-LABEL: nvvm_shfl_sync +__device__ void nvvm_shfl_sync(unsigned mask, int i, float f, int a, int b) { + // CHECK: call i32 @llvm.nvvm.shfl.sync.down.i32(i32 {{%[0-9]+}}, i32 + // expected-error@+1 {{'__nvvm_shfl_sync_down_i32' needs target feature ptx60}} + __nvvm_shfl_sync_down_i32(mask, i, a, b); + // CHECK: call float @llvm.nvvm.shfl.sync.down.f32(i32 {{%[0-9]+}}, float + // expected-error@+1 {{'__nvvm_shfl_sync_down_f32' needs target feature ptx60}} + __nvvm_shfl_sync_down_f32(mask, f, a, b); + // CHECK: call i32 @llvm.nvvm.shfl.sync.up.i32(i32 {{%[0-9]+}}, i32 + // expected-error@+1 {{'__nvvm_shfl_sync_up_i32' needs target feature ptx60}} + __nvvm_shfl_sync_up_i32(mask, i, a, b); + // CHECK: call float @llvm.nvvm.shfl.sync.up.f32(i32 {{%[0-9]+}}, float + // expected-error@+1 {{'__nvvm_shfl_sync_up_f32' needs target feature ptx60}} + __nvvm_shfl_sync_up_f32(mask, f, a, b); + // CHECK: call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 {{%[0-9]+}}, i32 + // expected-error@+1 {{'__nvvm_shfl_sync_bfly_i32' needs target feature ptx60}} + __nvvm_shfl_sync_bfly_i32(mask, i, a, b); + // CHECK: call float @llvm.nvvm.shfl.sync.bfly.f32(i32 {{%[0-9]+}}, float + // expected-error@+1 {{'__nvvm_shfl_sync_bfly_f32' needs target feature ptx60}} + __nvvm_shfl_sync_bfly_f32(mask, f, a, b); + // CHECK: call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 {{%[0-9]+}}, i32 + // expected-error@+1 {{'__nvvm_shfl_sync_idx_i32' needs target feature ptx60}} + __nvvm_shfl_sync_idx_i32(mask, i, a, b); + // CHECK: call float @llvm.nvvm.shfl.sync.idx.f32(i32 {{%[0-9]+}}, float + // expected-error@+1 {{'__nvvm_shfl_sync_idx_f32' needs target feature ptx60}} + __nvvm_shfl_sync_idx_f32(mask, f, a, b); + // CHECK: ret void +} diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c index b0d646a51fe..c97b549cbe0 100644 --- a/clang/test/CodeGen/builtins-nvptx.c +++ b/clang/test/CodeGen/builtins-nvptx.c @@ -636,3 +636,24 @@ __device__ void nvvm_ldg(const void *p) { typedef double double2 __attribute__((ext_vector_type(2))); __nvvm_ldg_d2((const double2 *)p); } + +// CHECK-LABEL: nvvm_shfl +__device__ void nvvm_shfl(int i, float f, int a, int b) { + // CHECK: call i32 @llvm.nvvm.shfl.down.i32(i32 + __nvvm_shfl_down_i32(i, a, b); + // CHECK: call float @llvm.nvvm.shfl.down.f32(float + __nvvm_shfl_down_f32(f, a, b); + // CHECK: call i32 @llvm.nvvm.shfl.up.i32(i32 + __nvvm_shfl_up_i32(i, a, b); + // CHECK: call float @llvm.nvvm.shfl.up.f32(float + __nvvm_shfl_up_f32(f, a, b); + // CHECK: call i32 @llvm.nvvm.shfl.bfly.i32(i32 + __nvvm_shfl_bfly_i32(i, a, b); + // CHECK: call float @llvm.nvvm.shfl.bfly.f32(float + __nvvm_shfl_bfly_f32(f, a, b); + // CHECK: call i32 @llvm.nvvm.shfl.idx.i32(i32 + __nvvm_shfl_idx_i32(i, a, b); + // CHECK: call float @llvm.nvvm.shfl.idx.f32(float + __nvvm_shfl_idx_f32(f, a, b); + // CHECK: ret void +} |

