[OPENMP][NVPTX] Support memory coalescing for globalized variables.

Added support for memory coalescing for better performance for globalized variables. From now on all the globalized variables are represented as arrays of 32 elements and each thread accesses these elements using `tid & 31` as index. llvm-svn: 344049
author: Alexey Bataev <a.bataev@hotmail.com> 2018-10-09 14:49:00 +0000
committer: Alexey Bataev <a.bataev@hotmail.com> 2018-10-09 14:49:00 +0000
commit: 9ea3c385973c395c4798145f71ff3ae34ccaabeb (patch)
tree: 0a2f511ca7fb094143065d6ee32b2976e7079a80 /clang/test/OpenMP/nvptx_teams_codegen.cpp
parent: f5fac1826a867d5e65310456670cc1d5a495ed02 (diff)
download: bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.tar.gz
bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.zip
1 files changed, 20 insertions, 8 deletions
diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp
index 4e3f2674fc5..91b372c65b6 100644
--- a/clang/test/OpenMP/nvptx_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp
@@ -36,10 +36,13 @@ int main (int argc, char **argv) {
 // CK1:  store {{.+}} 0, {{.+}},
 // CK1:  store i{{[0-9]+}} [[ARGC]], i{{[0-9]+}}* [[ARGCADDR]],
 // CK1-64:  [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ARGCADDR]] to i{{[0-9]+}}*
-// CK1:  call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
+// CK1:  call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 128, i16 0)
 // CK1-64:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
 // CK1-32:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
-// CK1:  [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK1:  [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK1:  [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CK1:  [[LID:%.+]] = and i32 [[TID]], 31
+// CK1:  [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]]
 // CK1:  store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
 // CK1:  store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
 // CK1:  [[ARGCADDR_PTR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[ARGCADDR_PTR]],
@@ -53,9 +56,12 @@ int main (int argc, char **argv) {
 // CK1: [[ARGCADDR_PTR:%.+]] = alloca i{{.+}}***,
 // CK1: [[ARGCADDR:%.+]] = alloca i{{.+}}**,
 // CK1: store i{{.+}}** [[ARGC]], i{{.+}}*** [[ARGCADDR]]
-// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
+// CK1: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{128|256}}, i16 0)
 // CK1: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
-// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK1: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK1: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CK1: [[LID:%.+]] = and i32 [[TID]], 31
+// CK1: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i8**], [32 x i8**]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]]
 // CK1: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
 // CK1: store i8*** [[ARGCADDR]], i8**** [[ARGCADDR_PTR]],
 // CK1: [[ARGCADDR_PTR_REF:%.+]] = load i{{.+}}**, i{{.+}}*** [[ARGCADDR_PTR]],
@@ -111,10 +117,13 @@ int main (int argc, char **argv) {
 // CK2-64: [[ACONV:%.+]] = bitcast i64* [[AADDR]] to i32*
 // CK2-64: [[BCONV:%.+]] = bitcast i64* [[BADDR]] to i32*
 // CK2-64: [[CONV:%.+]] = bitcast i64* [[ARGCADDR]] to i32*
-// CK2:  call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 4, i16 0)
+// CK2:  call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} 128, i16 0)
 // CK2-64:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[CONV]]
 // CK2-32:  [[ARG:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ARGCADDR]]
-// CK2:  [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK2:  [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK2:  [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CK2:  [[LID:%.+]] = and i32 [[TID]], 31
+// CK2:  [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]]
 // CK2:  store i{{[0-9]+}} [[ARG]], i{{[0-9]+}}* [[ARGCADDR]],
 // CK2:  {{%.+}} = call i32 @__kmpc_global_thread_num(
 // CK2:  store i{{[0-9]+}}* [[ARGCADDR]], i{{[0-9]+}}** [[ARGCADDR_PTR]],
@@ -132,9 +141,12 @@ int main (int argc, char **argv) {
 // CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
 // CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
 // CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
-// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{4|8}}, i16 0)
+// CK2: call i8* @__kmpc_data_sharing_push_stack(i{{[0-9]+}} {{128|256}}, i16 0)
 // CK2: [[ARG:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** [[ARGCADDR]]
-// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK2: [[ARGCADDR_ARR:%.+]] = getelementptr inbounds %struct.{{.*}}, %struct.{{.*}}* %{{.*}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CK2: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+// CK2: [[LID:%.+]] = and i32 [[TID]], 31
+// CK2: [[ARGCADDR:%.+]] = getelementptr inbounds [32 x i8**], [32 x i8**]* [[ARGCADDR_ARR]], i32 0, i32 [[LID]]
 // CK2: store i{{[0-9]+}}** [[ARG]], i{{[0-9]+}}*** [[ARGCADDR]],
 // CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
 // CK2: store i{{[0-9]+}}*** [[ARGCADDR]], i{{[0-9]+}}**** [[ARGCADDR_PTR]],
author	Alexey Bataev <a.bataev@hotmail.com>	2018-10-09 14:49:00 +0000
committer	Alexey Bataev <a.bataev@hotmail.com>	2018-10-09 14:49:00 +0000
commit	9ea3c385973c395c4798145f71ff3ae34ccaabeb (patch)
tree	0a2f511ca7fb094143065d6ee32b2976e7079a80 /clang/test/OpenMP/nvptx_teams_codegen.cpp
parent	f5fac1826a867d5e65310456670cc1d5a495ed02 (diff)
download	bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.tar.gz bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.zip