summaryrefslogtreecommitdiffstats
path: root/clang/test/OpenMP/nvptx_target_codegen.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-10-09 14:49:00 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-10-09 14:49:00 +0000
commit9ea3c385973c395c4798145f71ff3ae34ccaabeb (patch)
tree0a2f511ca7fb094143065d6ee32b2976e7079a80 /clang/test/OpenMP/nvptx_target_codegen.cpp
parentf5fac1826a867d5e65310456670cc1d5a495ed02 (diff)
downloadbcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.tar.gz
bcm5719-llvm-9ea3c385973c395c4798145f71ff3ae34ccaabeb.zip
[OPENMP][NVPTX] Support memory coalescing for globalized variables.
Added support for memory coalescing for better performance for globalized variables. From now on all the globalized variables are represented as arrays of 32 elements and each thread accesses these elements using `tid & 31` as index. llvm-svn: 344049
Diffstat (limited to 'clang/test/OpenMP/nvptx_target_codegen.cpp')
-rw-r--r--clang/test/OpenMP/nvptx_target_codegen.cpp15
1 files changed, 10 insertions, 5 deletions
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 01f346121a4..69c338995b7 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -554,7 +554,8 @@ int baz(int f, double &a) {
// CHECK: ret void
// CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}})
- // CHECK: [[STACK:%.+]] = alloca [[GLOBAL_ST:%.+]],
+ // CHECK: alloca i32,
+ // CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: store i32 0, i32* [[ZERO_ADDR]]
@@ -562,11 +563,15 @@ int baz(int f, double &a) {
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
// CHECK: br i1 [[IS_SPMD]], label
// CHECK: br label
- // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0)
- // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST]]*
+ // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0)
+ // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]*
// CHECK: br label
- // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ [[STACK]], {{.+}} ], [ [[REC_ADDR]], {{.+}} ]
- // CHECK: [[F_PTR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
+ // CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ [[REC_ADDR]], {{.+}} ]
+ // CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
+ // CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+ // CHECK: [[LID:%.+]] = and i32 [[TID]], 31
+ // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
+ // CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]], i32* [[GLOBAL_F_PTR]]
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
OpenPOWER on IntegriCloud