[NVPTX] Implemented shfl.sync instruction and supporting intrinsics/builtins.

Differential Revision: https://reviews.llvm.org/D38090 llvm-svn: 313820
author: Artem Belevich <tra@google.com> 2017-09-20 21:23:07 +0000
committer: Artem Belevich <tra@google.com> 2017-09-20 21:23:07 +0000
commit: 4654dc89beb0884dd707f402a06c1884f88196ad (patch)
tree: 004a34e5afeaaae1565dccb57f5907aa777b889a /clang/test/CodeGen
parent: 562bf99ee61dbc28901253590780981c45e38e72 (diff)
download: bcm5719-llvm-4654dc89beb0884dd707f402a06c1884f88196ad.tar.gz
bcm5719-llvm-4654dc89beb0884dd707f402a06c1884f88196ad.zip
2 files changed, 61 insertions, 0 deletions
diff --git a/clang/test/CodeGen/builtins-nvptx-ptx60.cu b/clang/test/CodeGen/builtins-nvptx-ptx60.cu
new file mode 100644
index 00000000000..e06c84c150c
--- /dev/null
+++ b/clang/test/CodeGen/builtins-nvptx-ptx60.cu
@@ -0,0 +1,40 @@
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_60 \
+// RUN:            -fcuda-is-device -target-feature +ptx60 \
+// RUN:            -S -emit-llvm -o - -x cuda %s \
+// RUN:   | FileCheck -check-prefix=CHECK %s
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \
+// RUN:   -fcuda-is-device -S -o /dev/null -x cuda -verify %s
+
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+// CHECK-LABEL: nvvm_shfl_sync
+__device__ void nvvm_shfl_sync(unsigned mask, int i, float f, int a, int b) {
+  // CHECK: call i32 @llvm.nvvm.shfl.sync.down.i32(i32 {{%[0-9]+}}, i32
+  // expected-error@+1 {{'__nvvm_shfl_sync_down_i32' needs target feature ptx60}}
+  __nvvm_shfl_sync_down_i32(mask, i, a, b);
+  // CHECK: call float @llvm.nvvm.shfl.sync.down.f32(i32 {{%[0-9]+}}, float
+  // expected-error@+1 {{'__nvvm_shfl_sync_down_f32' needs target feature ptx60}}
+  __nvvm_shfl_sync_down_f32(mask, f, a, b);
+  // CHECK: call i32 @llvm.nvvm.shfl.sync.up.i32(i32 {{%[0-9]+}}, i32
+  // expected-error@+1 {{'__nvvm_shfl_sync_up_i32' needs target feature ptx60}}
+  __nvvm_shfl_sync_up_i32(mask, i, a, b);
+  // CHECK: call float @llvm.nvvm.shfl.sync.up.f32(i32 {{%[0-9]+}}, float
+  // expected-error@+1 {{'__nvvm_shfl_sync_up_f32' needs target feature ptx60}}
+  __nvvm_shfl_sync_up_f32(mask, f, a, b);
+  // CHECK: call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 {{%[0-9]+}}, i32
+  // expected-error@+1 {{'__nvvm_shfl_sync_bfly_i32' needs target feature ptx60}}
+  __nvvm_shfl_sync_bfly_i32(mask, i, a, b);
+  // CHECK: call float @llvm.nvvm.shfl.sync.bfly.f32(i32 {{%[0-9]+}}, float
+  // expected-error@+1 {{'__nvvm_shfl_sync_bfly_f32' needs target feature ptx60}}
+  __nvvm_shfl_sync_bfly_f32(mask, f, a, b);
+  // CHECK: call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 {{%[0-9]+}}, i32
+  // expected-error@+1 {{'__nvvm_shfl_sync_idx_i32' needs target feature ptx60}}
+  __nvvm_shfl_sync_idx_i32(mask, i, a, b);
+  // CHECK: call float @llvm.nvvm.shfl.sync.idx.f32(i32 {{%[0-9]+}}, float
+  // expected-error@+1 {{'__nvvm_shfl_sync_idx_f32' needs target feature ptx60}}
+  __nvvm_shfl_sync_idx_f32(mask, f, a, b);
+  // CHECK: ret void
+}
diff --git a/clang/test/CodeGen/builtins-nvptx.c b/clang/test/CodeGen/builtins-nvptx.c
index b0d646a51fe..c97b549cbe0 100644
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@@ -636,3 +636,24 @@ __device__ void nvvm_ldg(const void *p) {
   typedef double double2 __attribute__((ext_vector_type(2)));
   __nvvm_ldg_d2((const double2 *)p);
 }
+
+// CHECK-LABEL: nvvm_shfl
+__device__ void nvvm_shfl(int i, float f, int a, int b) {
+  // CHECK: call i32 @llvm.nvvm.shfl.down.i32(i32
+  __nvvm_shfl_down_i32(i, a, b);
+  // CHECK: call float @llvm.nvvm.shfl.down.f32(float
+  __nvvm_shfl_down_f32(f, a, b);
+  // CHECK: call i32 @llvm.nvvm.shfl.up.i32(i32
+  __nvvm_shfl_up_i32(i, a, b);
+  // CHECK: call float @llvm.nvvm.shfl.up.f32(float
+  __nvvm_shfl_up_f32(f, a, b);
+  // CHECK: call i32 @llvm.nvvm.shfl.bfly.i32(i32
+  __nvvm_shfl_bfly_i32(i, a, b);
+  // CHECK: call float @llvm.nvvm.shfl.bfly.f32(float
+  __nvvm_shfl_bfly_f32(f, a, b);
+  // CHECK: call i32 @llvm.nvvm.shfl.idx.i32(i32
+  __nvvm_shfl_idx_i32(i, a, b);
+  // CHECK: call float @llvm.nvvm.shfl.idx.f32(float
+  __nvvm_shfl_idx_f32(f, a, b);
+  // CHECK: ret void
+}
author	Artem Belevich <tra@google.com>	2017-09-20 21:23:07 +0000
committer	Artem Belevich <tra@google.com>	2017-09-20 21:23:07 +0000
commit	4654dc89beb0884dd707f402a06c1884f88196ad (patch)
tree	004a34e5afeaaae1565dccb57f5907aa777b889a /clang/test/CodeGen
parent	562bf99ee61dbc28901253590780981c45e38e72 (diff)
download	bcm5719-llvm-4654dc89beb0884dd707f402a06c1884f88196ad.tar.gz bcm5719-llvm-4654dc89beb0884dd707f402a06c1884f88196ad.zip