diff options
author | Justin Lebar <jlebar@google.com> | 2016-02-24 21:55:11 +0000 |
---|---|---|
committer | Justin Lebar <jlebar@google.com> | 2016-02-24 21:55:11 +0000 |
commit | ddd97faeecce050402ae5b068ee070132254f5f9 (patch) | |
tree | 4f288415b1bfe6498a3d13543ef923fcd389665a /clang/test/CodeGenCUDA | |
parent | 1ef9b592846ccfad78c8d904428ba2500c169dc5 (diff) | |
download | bcm5719-llvm-ddd97faeecce050402ae5b068ee070132254f5f9.tar.gz bcm5719-llvm-ddd97faeecce050402ae5b068ee070132254f5f9.zip |
[CUDA] Mark all CUDA device-side function defs, decls, and calls as convergent.
Summary:
This is important for e.g. the following case:
void sync() { __syncthreads(); }
void foo() {
do_something();
sync();
do_something_else():
}
Without this change, if the optimizer does not inline sync() (which it
won't because __syncthreads is also marked as noduplicate, for now
anyway), it is free to perform optimizations on sync() that it would not
be able to perform on __syncthreads(), because sync() is not marked as
convergent.
Similarly, we need a notion of convergent calls, since in the case when
we can't statically determine a call's target(s), we need to know
whether it's safe to perform optimizations around the call.
This change is conservative; the optimizer will remove these attrs where
it can, see r260318, r260319.
Reviewers: majnemer
Subscribers: cfe-commits, jhen, echristo, tra
Differential Revision: http://reviews.llvm.org/D17056
llvm-svn: 261779
Diffstat (limited to 'clang/test/CodeGenCUDA')
-rw-r--r-- | clang/test/CodeGenCUDA/convergent.cu | 39 | ||||
-rw-r--r-- | clang/test/CodeGenCUDA/device-var-init.cu | 2 |
2 files changed, 40 insertions, 1 deletions
diff --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu new file mode 100644 index 00000000000..d2e75f7e2fa --- /dev/null +++ b/clang/test/CodeGenCUDA/convergent.cu @@ -0,0 +1,39 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix DEVICE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | \ +// RUN: FileCheck -check-prefix HOST %s + +#include "Inputs/cuda.h" + +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3foov +__device__ void foo() {} + +// HOST: Function Attrs: +// HOST-NOT: convergent +// HOST-NEXT: define void @_Z3barv +// DEVICE: Function Attrs: +// DEVICE-SAME: convergent +// DEVICE-NEXT: define void @_Z3barv +__host__ __device__ void baz(); +__host__ __device__ void bar() { + // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]] + baz(); +} + +// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// DEVICE: attributes [[BAZ_ATTR]] = { +// DEVICE-SAME: convergent +// DEVICE-SAME: } +// DEVICE: attributes [[CALL_ATTR]] = { convergent } + +// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]] +// HOST: attributes [[BAZ_ATTR]] = { +// HOST-NOT: convergent +// NOST-SAME: } diff --git a/clang/test/CodeGenCUDA/device-var-init.cu b/clang/test/CodeGenCUDA/device-var-init.cu index 774c59081b5..864cc6daee8 100644 --- a/clang/test/CodeGenCUDA/device-var-init.cu +++ b/clang/test/CodeGenCUDA/device-var-init.cu @@ -382,7 +382,7 @@ __device__ void df() { // CHECK: call void @_ZN4NETCC1IJEEEDpT_(%struct.NETC* %netc) // CHECK: call void @_ZN7EC_I_ECC1Ev(%struct.EC_I_EC* %ec_i_ec) // CHECK: call void @_ZN8EC_I_EC1C1Ev(%struct.EC_I_EC1* %ec_i_ec1) -// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) #3 +// CHECK: call void @_ZN5T_V_TC1Ev(%struct.T_V_T* %t_v_t) // CHECK: call void @_ZN7T_B_NECC1Ev(%struct.T_B_NEC* %t_b_nec) // CHECK: call void @_ZN7T_F_NECC1Ev(%struct.T_F_NEC* %t_f_nec) // CHECK: call void @_ZN8T_FA_NECC1Ev(%struct.T_FA_NEC* %t_fa_nec) |