summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGen
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2017-09-20 21:23:07 +0000
committerArtem Belevich <tra@google.com>2017-09-20 21:23:07 +0000
commit4654dc89beb0884dd707f402a06c1884f88196ad (patch)
tree004a34e5afeaaae1565dccb57f5907aa777b889a /clang/test/CodeGen
parent562bf99ee61dbc28901253590780981c45e38e72 (diff)
downloadbcm5719-llvm-4654dc89beb0884dd707f402a06c1884f88196ad.tar.gz
bcm5719-llvm-4654dc89beb0884dd707f402a06c1884f88196ad.zip
[NVPTX] Implemented shfl.sync instruction and supporting intrinsics/builtins.
Differential Revision: https://reviews.llvm.org/D38090 llvm-svn: 313820
Diffstat (limited to 'clang/test/CodeGen')
-rw-r--r--clang/test/CodeGen/builtins-nvptx-ptx60.cu40
-rw-r--r--clang/test/CodeGen/builtins-nvptx.c21
2 files changed, 61 insertions, 0 deletions
diff --git a/clang/test/CodeGen/builtins-nvptx-ptx60.cu b/clang/test/CodeGen/builtins-nvptx-ptx60.cu
new file mode 100644
index 00000000000..e06c84c150c
--- /dev/null
+++ b/clang/test/CodeGen/builtins-nvptx-ptx60.cu
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \
+// RUN: -fcuda-is-device -target-feature +ptx60 \
+// RUN: -S -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefix=CHECK %s
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \
+// RUN: -fcuda-is-device -S -o /dev/null -x cuda -verify %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+// CHECK-LABEL: nvvm_shfl_sync
+__device__ void nvvm_shfl_sync(unsigned mask, int i, float f, int a, int b) {
+ // CHECK: call i32 @llvm.nvvm.shfl.sync.down.i32(i32 {{%[0-9]+}}, i32
+ // expected-error@+1 {{'__nvvm_shfl_sync_down_i32' needs target feature ptx60}}
+ __nvvm_shfl_sync_down_i32(mask, i, a, b);
+ // CHECK: call float @llvm.nvvm.shfl.sync.down.f32(i32 {{%[0-9]+}}, float
+ // expected-error@+1 {{'__nvvm_shfl_sync_down_f32' needs target feature ptx60}}
+ __nvvm_shfl_sync_down_f32(mask, f, a, b);
+ // CHECK: call i32 @llvm.nvvm.shfl.sync.up.i32(i32 {{%[0-9]+}}, i32
+ // expected-error@+1 {{'__nvvm_shfl_sync_up_i32' needs target feature ptx60}}
+ __nvvm_shfl_sync_up_i32(mask, i, a, b);
+ // CHECK: call float @llvm.nvvm.shfl.sync.up.f32(i32 {{%[0-9]+}}, float
+ // expected-error@+1 {{'__nvvm_shfl_sync_up_f32' needs target feature ptx60}}
+ __nvvm_shfl_sync_up_f32(mask, f, a, b);
+ // CHECK: call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 {{%[0-9]+}}, i32
+ // expected-error@+1 {{'__nvvm_shfl_sync_bfly_i32' needs target feature ptx60}}
+ __nvvm_shfl_sync_bfly_i32(mask, i, a, b);
+ // CHECK: call float @llvm.nvvm.shfl.sync.bfly.f32(i32 {{%[0-9]+}}, float
+ // expected-error@+1 {{'__nvvm_shfl_sync_bfly_f32' needs target feature ptx60}}
+ __nvvm_shfl_sync_bfly_f32(mask, f, a, b);
+ // CHECK: call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 {{%[0-9]+}}, i32
+ // expected-error@+1 {{'__nvvm_shfl_sync_idx_i32' needs target feature ptx60}}
+ __nvvm_shfl_sync_idx_i32(mask, i, a, b);
+ // CHECK: call float @llvm.nvvm.shfl.sync.idx.f32(i32 {{%[0-9]+}}, float
+ // expected-error@+1 {{'__nvvm_shfl_sync_idx_f32' needs target feature ptx60}}
+ __nvvm_shfl_sync_idx_f32(mask, f, a, b);
+ // CHECK: ret void
+}
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index b0d646a51fe..c97b549cbe0 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -636,3 +636,24 @@ __device__ void nvvm_ldg(const void *p) {
typedef double double2 __attribute__((ext_vector_type(2)));
__nvvm_ldg_d2((const double2 *)p);
}
+
+// CHECK-LABEL: nvvm_shfl
+__device__ void nvvm_shfl(int i, float f, int a, int b) {
+ // CHECK: call i32 @llvm.nvvm.shfl.down.i32(i32
+ __nvvm_shfl_down_i32(i, a, b);
+ // CHECK: call float @llvm.nvvm.shfl.down.f32(float
+ __nvvm_shfl_down_f32(f, a, b);
+ // CHECK: call i32 @llvm.nvvm.shfl.up.i32(i32
+ __nvvm_shfl_up_i32(i, a, b);
+ // CHECK: call float @llvm.nvvm.shfl.up.f32(float
+ __nvvm_shfl_up_f32(f, a, b);
+ // CHECK: call i32 @llvm.nvvm.shfl.bfly.i32(i32
+ __nvvm_shfl_bfly_i32(i, a, b);
+ // CHECK: call float @llvm.nvvm.shfl.bfly.f32(float
+ __nvvm_shfl_bfly_f32(f, a, b);
+ // CHECK: call i32 @llvm.nvvm.shfl.idx.i32(i32
+ __nvvm_shfl_idx_i32(i, a, b);
+ // CHECK: call float @llvm.nvvm.shfl.idx.f32(float
+ __nvvm_shfl_idx_f32(f, a, b);
+ // CHECK: ret void
+}
OpenPOWER on IntegriCloud