diff options
| author | Alexey Bataev <a.bataev@hotmail.com> | 2018-05-15 18:01:01 +0000 |
|---|---|---|
| committer | Alexey Bataev <a.bataev@hotmail.com> | 2018-05-15 18:01:01 +0000 |
| commit | 2a3320a928bd5d0731f5a45254c838f7773c6a65 (patch) | |
| tree | e7d75daf79fef73a0271be3449d73de2a3743d02 /clang/test | |
| parent | e182b28ae4a5d467ed990fe50bd215795877f6fa (diff) | |
| download | bcm5719-llvm-2a3320a928bd5d0731f5a45254c838f7773c6a65.tar.gz bcm5719-llvm-2a3320a928bd5d0731f5a45254c838f7773c6a65.zip | |
[OPENMP, NVPTX] Do not globalize variables with reference/pointer types.
In generic data-sharing mode we do not need to globalize
variables/parameters of reference/pointer types. They already are placed
in the global memory.
llvm-svn: 332380
Diffstat (limited to 'clang/test')
5 files changed, 23 insertions, 42 deletions
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 718c650bec6..0a8e56106ed 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -18,7 +18,7 @@ __thread int id; -int baz(int f); +int baz(int f, double &a); template<typename tx, typename ty> struct TT{ @@ -345,7 +345,7 @@ struct S1 { { this->a = (double)b + 1.5; c[1][1] = ++a; - baz(a); + baz(a, a); } return c[1][1] + (int)b; @@ -367,9 +367,9 @@ int bar(int n){ return a; } -int baz(int f) { +int baz(int f, double &a) { #pragma omp parallel - f = 2; + f = 2 + a; return f; } @@ -551,7 +551,7 @@ int baz(int f) { // CHECK: [[EXIT]] // CHECK: ret void - // CHECK: define i32 [[BAZ]](i32 [[F:%.*]]) + // CHECK: define i32 [[BAZ]](i32 [[F:%.*]], double* dereferenceable{{.*}}) // CHECK: [[ZERO_ADDR:%.+]] = alloca i32, // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* // CHECK: [[GTID_ADDR:%.+]] = alloca i32, @@ -559,13 +559,13 @@ int baz(int f) { // CHECK: [[PTR:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i{{64|32}} 4, i16 0) // CHECK: [[REC_ADDR:%.+]] = bitcast i8* [[PTR]] to %struct._globalized_locals_ty* // CHECK: [[F_PTR:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[REC_ADDR]], i32 0, i32 0 - // CHECK: store i32 [[F]], i32* [[F_PTR]], + // CHECK: store i32 %{{.+}}, i32* [[F_PTR]], // CHECK: store i32 [[GTID]], i32* [[GTID_ADDR]], // CHECK: icmp eq i32 // CHECK: br i1 // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1) - // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 1) + // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2) // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]], // CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0 // CHECK: [[F_REF:%.+]] = bitcast i32* [[F_PTR]] to i8* @@ -580,11 +580,11 @@ int baz(int f) { // CHECK: br i1 // CHECK: call void @__kmpc_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]]) - // CHECK: call void @__omp_outlined__(i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]]) + // CHECK: call void [[OUTLINED:@.+]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}}) // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @{{.+}}, i32 [[GTID]]) // CHECK: br label - // CHECK: call void @__omp_outlined__(i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]]) + // CHECK: call void [[OUTLINED]](i32* [[GTID_ADDR]], i32* [[ZERO_ADDR]], i32* [[F_PTR]], double* %{{.+}}) // CHECK: br label // CHECK: [[RES:%.+]] = load i32, i32* [[F_PTR]], diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp index 64d195c43a2..d193174ce7d 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -55,6 +55,7 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l26}}( // CHECK: [[AA_ADDR:%.+]] = alloca i16*, align + // CHECK-NOT: call i8* @__kmpc_data_sharing_push_stack // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index 73d3bf82134..13a7fb289da 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -8,9 +8,9 @@ #ifndef HEADER #define HEADER -// Check that the execution mode of all 2 target regions on the gpu is set to SPMD Mode. -// CHECK-DAG: {{@__omp_offloading_.+l21}}_exec_mode = weak constant i8 0 -// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 0 +// Check that the execution mode of all 2 target regions on the gpu is set to non-SPMD Mode. +// CHECK-DAG: {{@__omp_offloading_.+l21}}_exec_mode = weak constant i8 1 +// CHECK-DAG: {{@__omp_offloading_.+l26}}_exec_mode = weak constant i8 1 template<typename tx> tx ftemplate(int n) { @@ -46,23 +46,13 @@ int bar(int n){ // CHECK: store i16* {{%.+}}, i16** [[AA_ADDR]], align // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], - // CHECK: br label {{%?}}[[EXEC:.+]] - // - // CHECK: [[EXEC]] - // CHECK-NOT: call void @__kmpc_push_num_threads - // CHECK: {{call|invoke}} void [[OP1:@.+]]({{.+}}, {{.+}}, i16* [[AA]]) - // CHECK: br label {{%?}}[[DONE:.+]] - // - // CHECK: [[DONE]] - // CHECK: call void @__kmpc_spmd_kernel_deinit() - // CHECK: br label {{%?}}[[EXIT:.+]] - // - // CHECK: [[EXIT]] + // CHECK: call void @__kmpc_kernel_init(i32 + // CHECK: call void @__kmpc_push_num_threads + // CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK: } - // CHECK: define internal void [[OP1]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i16* {{[^%]*}}[[ARG:%.+]]) + // CHECK: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i16* {{[^%]*}}[[ARG:%.+]]) // CHECK: = alloca i32*, align // CHECK: = alloca i32*, align // CHECK: [[AA_ADDR:%.+]] = alloca i16*, align @@ -89,23 +79,13 @@ int bar(int n){ // CHECK: [[AA:%.+]] = load i16*, i16** [[AA_ADDR]], align // CHECK: [[B:%.+]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align // CHECK: [[THREAD_LIMIT:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - // CHECK: call void @__kmpc_spmd_kernel_init(i32 [[THREAD_LIMIT]], - // CHECK: br label {{%?}}[[EXEC:.+]] - // - // CHECK: [[EXEC]] - // CHECK-NOT: call void @__kmpc_push_num_threads - // CHECK: {{call|invoke}} void [[OP2:@.+]]({{.+}}, {{.+}}, i32* [[A]], i16* [[AA]], [10 x i32]* [[B]]) - // CHECK: br label {{%?}}[[DONE:.+]] - // - // CHECK: [[DONE]] - // CHECK: call void @__kmpc_spmd_kernel_deinit() - // CHECK: br label {{%?}}[[EXIT:.+]] - // - // CHECK: [[EXIT]] + // CHECK: call void @__kmpc_kernel_init(i32 + // CHECK: call void @__kmpc_push_num_threads + // CHECK: call void @__kmpc_kernel_deinit(i16 1) // CHECK: ret void // CHECK: } - // CHECK: define internal void [[OP2]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* {{[^%]*}}[[ARG1:%.+]], i16* {{[^%]*}}[[ARG2:%.+]], [10 x i32]* {{[^%]*}}[[ARG3:%.+]]) + // CHECK: define internal void @{{.+}}(i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* {{[^%]*}}[[ARG1:%.+]], i16* {{[^%]*}}[[ARG2:%.+]], [10 x i32]* {{[^%]*}}[[ARG3:%.+]]) // CHECK: = alloca i32*, align // CHECK: = alloca i32*, align // CHECK: [[A_ADDR:%.+]] = alloca i32*, align diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 66a3d46955a..195f428e0fb 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -45,7 +45,7 @@ tx ftemplate(int n) { b[i] += 1; } -#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) num_threads(M) +#pragma omp target teams distribute parallel for collapse(2) firstprivate(f) private(k) for(int i = 0; i < M; i++) { for(int j = 0; j < M; j++) { k = M; diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index c508bc912fd..051ccfe9c40 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -43,7 +43,7 @@ tx ftemplate(int n) { b[i] += 1; } -#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) num_threads(M) +#pragma omp target teams distribute parallel for simd collapse(2) firstprivate(f) private(k) for(int i = 0; i < M; i++) { for(int j = 0; j < M; j++) { k = M; |

