diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
commit | 2377fe95c6beeba5cbba356fee7fba32257bbce2 (patch) | |
tree | b853c3ba0847ca5371ae261d956726aa9d99223e /clang/test/OpenMP/parallel_for_linear_codegen.cpp | |
parent | f054eca167ea834a168704ff7065a149d75e8f0f (diff) | |
download | bcm5719-llvm-2377fe95c6beeba5cbba356fee7fba32257bbce2.tar.gz bcm5719-llvm-2377fe95c6beeba5cbba356fee7fba32257bbce2.zip |
[OPENMP] Outlined function for parallel and other regions with list of captured variables.
Currently all variables used in OpenMP regions are captured into a record and passed to outlined functions in this record. It may result in some poor performance because of too complex analysis later in optimization passes. Patch makes to emit outlined functions for parallel-based regions with a list of captured variables. It reduces code for 2*n GEPs, stores and loads at least.
Codegen for task-based regions remains unchanged because runtime requires that all captured variables are passed in captured record.
llvm-svn: 247251
Diffstat (limited to 'clang/test/OpenMP/parallel_for_linear_codegen.cpp')
-rw-r--r-- | clang/test/OpenMP/parallel_for_linear_codegen.cpp | 35 |
1 files changed, 14 insertions, 21 deletions
diff --git a/clang/test/OpenMP/parallel_for_linear_codegen.cpp b/clang/test/OpenMP/parallel_for_linear_codegen.cpp index 7d27efef7b6..d980dd34de2 100644 --- a/clang/test/OpenMP/parallel_for_linear_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_linear_codegen.cpp @@ -23,9 +23,7 @@ float f; char cnt; // CHECK: [[S_FLOAT_TY:%.+]] = type { float } -// CHECK: [[CAP_MAIN_TY:%.+]] = type { float**, i64* } // CHECK: [[S_INT_TY:%.+]] = type { i32 } -// CHECK: [[CAP_TMAIN_TY:%.+]] = type { i32**, i32* } // CHECK-DAG: [[IMPLICIT_BARRIER_LOC:@.+]] = private unnamed_addr constant %{{.+}} { i32 0, i32 66, i32 0, i32 0, i8* // CHECK-DAG: [[F:@.+]] = global float 0.0 // CHECK-DAG: [[CNT:@.+]] = global i8 0 @@ -48,10 +46,10 @@ int main() { // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}}) + // LAMBDA: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]]) #pragma omp parallel for linear(g:5) for (int i = 0; i < 2; ++i) { - // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]]) + // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}) // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, @@ -60,7 +58,6 @@ int main() { // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, - // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], // LAMBDA: store i32 0, // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] @@ -97,10 +94,10 @@ int main() { // BLOCKS: call void {{%.+}}(i8 ^{ // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i8* %{{.+}}) + // BLOCKS: call void {{.+}} @__kmpc_fork_call({{.+}}, i32 1, {{.+}}* [[OMP_REGION:@.+]] to {{.+}}, i32* [[G]]) #pragma omp parallel for linear(g:5) for (int i = 0; i < 2; ++i) { - // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* %{{.+}}, i32* %{{.+}}, %{{.+}}* [[ARG:%.+]]) + // BLOCKS: define{{.*}} internal{{.*}} void [[OMP_REGION]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, i32* dereferenceable(4) %{{.+}}) // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[G_START_ADDR:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, @@ -109,7 +106,6 @@ int main() { // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, - // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], // BLOCKS: store i32 0, // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] @@ -156,13 +152,12 @@ int main() { // CHECK: define i{{[0-9]+}} @main() // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]], // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]]) -// CHECK: %{{.+}} = bitcast [[CAP_MAIN_TY]]* -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_MAIN_TY]]*)* [[MAIN_MICROTASK:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 2, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, float**, i64*)* [[MAIN_MICROTASK:@.+]] to void // CHECK: = call {{.+}} [[TMAIN_INT:@.+]]() // CHECK: call void [[S_FLOAT_TY_DESTR:@.+]]([[S_FLOAT_TY]]* // CHECK: ret -// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_MAIN_TY]]* %{{.+}}) +// CHECK: define internal void [[MAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, float** dereferenceable(8) %{{.+}}, i64* dereferenceable(8) %{{.+}}) // CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_START:%.+]] = alloca float*, // CHECK: [[LVAR_START:%.+]] = alloca i64, @@ -175,13 +170,12 @@ int main() { // CHECK: [[LVAR_PRIV:%.+]] = alloca i64, // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] +// CHECK: [[PVAR_REF:%.+]] = load float**, float*** % +// CHECK: [[LVAR_REF:%.+]] = load i64*, i64** % + // Check for default initialization. -// CHECK: [[PVAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[PVAR_REF:%.+]] = load float**, float*** [[PVAR_PTR_REF]], // CHECK: [[PVAR_VAL:%.+]] = load float*, float** [[PVAR_REF]], // CHECK: store float* [[PVAR_VAL]], float** [[PVAR_START]], -// CHECK: [[LVAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[LVAR_REF:%.+]] = load i64*, i64** [[LVAR_PTR_REF]], // CHECK: [[LVAR_VAL:%.+]] = load i64, i64* [[LVAR_REF]], // CHECK: store i64 [[LVAR_VAL]], i64* [[LVAR_START]], // CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) @@ -210,11 +204,11 @@ int main() { // CHECK: define {{.*}} i{{[0-9]+}} [[TMAIN_INT]]() // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]]) -// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 1, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, [[CAP_TMAIN_TY]]*)* [[TMAIN_MICROTASK:@.+]] to void +// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_call(%{{.+}}* @{{.+}}, i{{[0-9]+}} 2, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*, i32**, i32*)* [[TMAIN_MICROTASK:@.+]] to void // CHECK: call void [[S_INT_TY_DESTR:@.+]]([[S_INT_TY]]* // CHECK: ret // -// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* [[GTID_ADDR:%.+]], i{{[0-9]+}}* %{{.+}}, [[CAP_TMAIN_TY]]* %{{.+}}) +// CHECK: define internal void [[TMAIN_MICROTASK]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i32** dereferenceable(8) %{{.+}}, i32* dereferenceable(4) %{{.+}}) // CHECK: alloca i{{[0-9]+}}, // CHECK: [[PVAR_START:%.+]] = alloca i32*, // CHECK: [[LVAR_START:%.+]] = alloca i32, @@ -227,13 +221,12 @@ int main() { // CHECK: [[LVAR_PRIV:%.+]] = alloca i32, // CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]] +// CHECK: [[PVAR_REF:%.+]] = load i32**, i32*** % +// CHECK: [[LVAR_REF:%.+]] = load i32*, i32** % + // Check for default initialization. -// CHECK: [[PVAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[PVAR_REF:%.+]] = load i32**, i32*** [[PVAR_PTR_REF]], // CHECK: [[PVAR_VAL:%.+]] = load i32*, i32** [[PVAR_REF]], // CHECK: store i32* [[PVAR_VAL]], i32** [[PVAR_START]], -// CHECK: [[LVAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[LVAR_REF:%.+]] = load i32*, i32** [[LVAR_PTR_REF]], // CHECK: [[LVAR_VAL:%.+]] = load i32, i32* [[LVAR_REF]], // CHECK: store i32 [[LVAR_VAL]], i32* [[LVAR_START]], // CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID:%.+]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) |