diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2018-10-09 14:49:00 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-10-09 14:49:00 +0000 |
commit | 9ea3c385973c395c4798145f71ff3ae34ccaabeb (patch) | |
tree | 0a2f511ca7fb094143065d6ee32b2976e7079a80 /clang/test/OpenMP/nvptx_teams_codegen.cpp | |
parent | f5fac1826a867d5e65310456670cc1d5a495ed02 (diff) | |
download | bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.tar.gz bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.zip |
[OPENMP][NVPTX] Support memory coalescing for globalized variables.
Added support for memory coalescing for better performance for
globalized variables. From now on all the globalized variables are
represented as arrays of 32 elements and each thread accesses these
elements using `tid & 31` as index.
llvm-svn: 344049
Diffstat (limited to 'clang/test/OpenMP/nvptx_teams_codegen.cpp')
-rw-r--r-- | clang/test/OpenMP/nvptx_teams_codegen.cpp | 28 |
1 files changed, 20 insertions, 8 deletions
diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index 4e3f2674fc5..91b372c65b6 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -36,10 +36,13 @@ int main (int argc, char **argv) { // CK1: store {{.+}} 0, {{.+}}, // CK1: store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]], // CK1-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}* -// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0) +// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 128, i16 0) // CK1-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]] // CK1-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]] -// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK1: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK1: [[LID:%.+]] = and i32 [[TID]], 31 +// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]] // CK1: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]], // CK1: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]], // CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]], @@ -53,9 +56,12 @@ int main (int argc, char **argv) { // CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***, // CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**, // CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]] -// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0) +// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{128|256}}, i16 0) // CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]] -// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK1: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK1: [[LID:%.+]] = and i32 [[TID]], 31 +// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i8**], [32 x i8**]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]] // CK1: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]], // CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]], // CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]], @@ -111,10 +117,13 @@ int main (int argc, char **argv) { // CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32* // CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32* // CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32* -// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0) +// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 128, i16 0) // CK2-64: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]] // CK2-32: [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]] -// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK2: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK2: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK2: [[LID:%.+]] = and i32 [[TID]], 31 +// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]] // CK2: store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]], // CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]], @@ -132,9 +141,12 @@ int main (int argc, char **argv) { // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]], // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]], // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]], -// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0) +// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{128|256}}, i16 0) // CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]] -// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK2: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 +// CK2: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CK2: [[LID:%.+]] = and i32 [[TID]], 31 +// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i8**], [32 x i8**]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]] // CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]], // CK2: {{%.+}} = call i32 @__kmpc_global_thread_num( // CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]], |