[CUDA] Mark all CUDA device-side function defs, decls, and calls as convergent.

Summary: This is important for e.g. the following case: void sync() { __syncthreads(); } void foo() { do_something(); sync(); do_something_else(): } Without this change, if the optimizer does not inline sync() (which it won't because __syncthreads is also marked as noduplicate, for now anyway), it is free to perform optimizations on sync() that it would not be able to perform on __syncthreads(), because sync() is not marked as convergent. Similarly, we need a notion of convergent calls, since in the case when we can't statically determine a call's target(s), we need to know whether it's safe to perform optimizations around the call. This change is conservative; the optimizer will remove these attrs where it can, see r260318, r260319. Reviewers: majnemer Subscribers: cfe-commits, jhen, echristo, tra Differential Revision: http://reviews.llvm.org/D17056 llvm-svn: 261779
author: Justin Lebar <jlebar@google.com> 2016-02-24 21:55:11 +0000
committer: Justin Lebar <jlebar@google.com> 2016-02-24 21:55:11 +0000
commit: ddd97faeecce050402ae5b068ee070132254f5f9 (patch)
tree: 4f288415b1bfe6498a3d13543ef923fcd389665a /clang/test/CodeGenCUDA
parent: 1ef9b592846ccfad78c8d904428ba2500c169dc5 (diff)
download: bcm5719-llvm-ddd97faeecce050402ae5b068ee070132254f5f9.tar.gz
bcm5719-llvm-ddd97faeecce050402ae5b068ee070132254f5f9.zip
2 files changed, 40 insertions, 1 deletions
diff --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu
new file mode 100644
index 00000000000..d2e75f7e2fa
--- /dev/null
+++ b/clang/test/CodeGenCUDA/convergent.cu
@@ -0,0 +1,39 @@
+// REQUIRES: x86-registered-target
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \
+// RUN:   -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \
+// RUN:   -disable-llvm-passes -o - %s | \
+// RUN:  FileCheck -check-prefix HOST %s
+
+#include "Inputs/cuda.h"
+
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3foov
+__device__ void foo() {}
+
+// HOST: Function Attrs:
+// HOST-NOT: convergent
+// HOST-NEXT: define void @_Z3barv
+// DEVICE: Function Attrs:
+// DEVICE-SAME: convergent
+// DEVICE-NEXT: define void @_Z3barv
+__host__ __device__ void baz();
+__host__ __device__ void bar() {
+  // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
+  baz();
+}
+
+// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// DEVICE: attributes [[BAZ_ATTR]] = {
+// DEVICE-SAME: convergent
+// DEVICE-SAME: }
+// DEVICE: attributes [[CALL_ATTR]] = { convergent }
+
+// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
+// HOST: attributes [[BAZ_ATTR]] = {
+// HOST-NOT: convergent
+// NOST-SAME: }
diff --git a/clang/test/CodeGenCUDA/device-var-init.cu b/clang/test/CodeGenCUDA/device-var-init.cu
index 774c59081b5..864cc6daee8 100644
--- a/clang/test/CodeGenCUDA/device-var-init.cu
+++ b/clang/test/CodeGenCUDA/device-var-init.cu
@@ -382,7 +382,7 @@ __device__ void df() {
 // CHECK:   call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc)
 // CHECK:   call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec)
 // CHECK:   call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1)
-// CHECK:   call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) #3
+// CHECK:   call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t)
 // CHECK:   call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec)
 // CHECK:   call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec)
 // CHECK:   call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec)
author	Justin Lebar <jlebar@google.com>	2016-02-24 21:55:11 +0000
committer	Justin Lebar <jlebar@google.com>	2016-02-24 21:55:11 +0000
commit	ddd97faeecce050402ae5b068ee070132254f5f9 (patch)
tree	4f288415b1bfe6498a3d13543ef923fcd389665a /clang/test/CodeGenCUDA
parent	1ef9b592846ccfad78c8d904428ba2500c169dc5 (diff)
download	bcm5719-llvm-ddd97faeecce050402ae5b068ee070132254f5f9.tar.gz bcm5719-llvm-ddd97faeecce050402ae5b068ee070132254f5f9.zip