summaryrefslogtreecommitdiffstats
path: root/clang/test/OpenMP/nvptx_target_codegen.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-10-12 16:04:20 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-10-12 16:04:20 +0000
commit9bfe91da3d2c2eb3f0cba9dc587a51ddbbf4d8ac (patch)
tree6a8da57be99cd61b82cbe3160d91fc0fcd578744 /clang/test/OpenMP/nvptx_target_codegen.cpp
parentc046b6856ec91e65b1112e66af0fa6735c20bc7d (diff)
downloadbcm5719-llvm-9bfe91da3d2c2eb3f0cba9dc587a51ddbbf4d8ac.tar.gz
bcm5719-llvm-9bfe91da3d2c2eb3f0cba9dc587a51ddbbf4d8ac.zip
[OPENMP][NVPTX]Reduce memory usage in orphaned functions.
if the function has globalized variables and called in context of target/teams/distribute regions, it does not need to globalize 32 copies of the same variables for memory coalescing, it is enough to have just one copy, because there is parallel region. Patch does this by adding call for `__kmpc_parallel_level` function and checking its return value. If the code sees that the parallel level is 0, then only one variable is allocated, not 32. llvm-svn: 344356
Diffstat (limited to 'clang/test/OpenMP/nvptx_target_codegen.cpp')
-rw-r--r--clang/test/OpenMP/nvptx_target_codegen.cpp12
1 files changed, 9 insertions, 3 deletions
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 69c338995b7..3525d2e2054 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -557,20 +557,26 @@ int baz(int f, double &a) {
// CHECK: alloca i32,
// CHECK: [[LOCAL_F_PTR:%.+]] = alloca i32,
// CHECK: [[ZERO_ADDR:%.+]] = alloca i32,
- // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
// CHECK: store i32 0, i32* [[ZERO_ADDR]]
+ // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t*
+ // CHECK: [[PAR_LEVEL:%.+]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @0, i32 [[GTID]])
+ // CHECK: [[IS_TTD:%.+]] = icmp eq i16 %1, 0
// CHECK: [[RES:%.+]] = call i8 @__kmpc_is_spmd_exec_mode()
// CHECK: [[IS_SPMD:%.+]] = icmp ne i8 [[RES]], 0
// CHECK: br i1 [[IS_SPMD]], label
// CHECK: br label
- // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 128, i16 0)
+ // CHECK: [[SIZE:%.+]] = select i1 [[IS_TTD]], i{{64|32}} 4, i{{64|32}} 128
+ // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} [[SIZE]], i16 0)
// CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to [[GLOBAL_ST:%.+]]*
// CHECK: br label
// CHECK: [[ITEMS:%.+]] = phi [[GLOBAL_ST]]* [ null, {{.+}} ], [ [[REC_ADDR]], {{.+}} ]
+ // CHECK: [[TTD_ITEMS:%.+]] = bitcast [[GLOBAL_ST]]* [[ITEMS]] to [[SEC_GLOBAL_ST:%.+]]*
// CHECK: [[F_PTR_ARR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[ITEMS]], i32 0, i32 0
// CHECK: [[TID:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
// CHECK: [[LID:%.+]] = and i32 [[TID]], 31
- // CHECK: [[GLOBAL_F_PTR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
+ // CHECK: [[GLOBAL_F_PTR_PAR:%.+]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F_PTR_ARR]], i32 0, i32 [[LID]]
+ // CHECK: [[GLOBAL_F_PTR_TTD:%.+]] = getelementptr inbounds [[SEC_GLOBAL_ST]], [[SEC_GLOBAL_ST]]* [[TTD_ITEMS]], i32 0, i32 0
+ // CHECK: [[GLOBAL_F_PTR:%.+]] = select i1 [[IS_TTD]], i32* [[GLOBAL_F_PTR_TTD]], i32* [[GLOBAL_F_PTR_PAR]]
// CHECK: [[F_PTR:%.+]] = select i1 [[IS_SPMD]], i32* [[LOCAL_F_PTR]], i32* [[GLOBAL_F_PTR]]
// CHECK: store i32 %{{.+}}, i32* [[F_PTR]],
OpenPOWER on IntegriCloud