diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
commit | 2377fe95c6beeba5cbba356fee7fba32257bbce2 (patch) | |
tree | b853c3ba0847ca5371ae261d956726aa9d99223e /clang/test/OpenMP/parallel_codegen.cpp | |
parent | f054eca167ea834a168704ff7065a149d75e8f0f (diff) | |
download | bcm5719-llvm-2377fe95c6beeba5cbba356fee7fba32257bbce2.tar.gz bcm5719-llvm-2377fe95c6beeba5cbba356fee7fba32257bbce2.zip |
[OPENMP] Outlined function for parallel and other regions with list of captured variables.
Currently all variables used in OpenMP regions are captured into a record and passed to outlined functions in this record. It may result in some poor performance because of too complex analysis later in optimization passes. Patch makes to emit outlined functions for parallel-based regions with a list of captured variables. It reduces code for 2*n GEPs, stores and loads at least.
Codegen for task-based regions remains unchanged because runtime requires that all captured variables are passed in captured record.
llvm-svn: 247251
Diffstat (limited to 'clang/test/OpenMP/parallel_codegen.cpp')
-rw-r--r-- | clang/test/OpenMP/parallel_codegen.cpp | 78 |
1 files changed, 25 insertions, 53 deletions
diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index 37d16793574..307476a29a4 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -6,13 +6,9 @@ #ifndef HEADER #define HEADER // CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } -// CHECK-DAG: %struct.anon = type { i32* } -// CHECK-DAG: %struct.anon.0 = type { i8*** } // CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-DEBUG-DAG: %ident_t = type { i32, i32, i32, i32, i8* } -// CHECK-DEBUG-DAG: %struct.anon = type { i32* } -// CHECK-DEBUG-DAG: %struct.anon.0 = type { i8*** } // CHECK-DEBUG-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" // CHECK-DEBUG-DAG: [[DEF_LOC_2:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } // CHECK-DEBUG-DAG: [[LOC1:@.+]] = private unnamed_addr constant [{{.+}} x i8] c";{{.*}}parallel_codegen.cpp;main;[[@LINE+14]];9;;\00" @@ -35,39 +31,30 @@ int main (int argc, char **argv) { } // CHECK-LABEL: define {{[a-z\_\b]*[ ]?i32}} @main({{i32[ ]?[a-z]*}} %argc, i8** %argv) -// CHECK: [[AGG_CAPTURED:%.+]] = alloca %struct.anon -// CHECK: [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i32* {{%[a-z0-9.]+}}, i32** [[ARGC_REF]] -// CHECK-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK-NEXT: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]]) +// CHECK: store i32 %argc, i32* [[ARGC_ADDR:%.+]], +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]]) // CHECK-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} // CHECK-NEXT: [[RET:%.+]] = call {{[a-z\_\b]*[ ]?i32}} [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) // CHECK-NEXT: ret i32 [[RET]] // CHECK-NEXT: } // CHECK-DEBUG-LABEL: define i32 @main(i32 %argc, i8** %argv) -// CHECK-DEBUG-DAG: [[AGG_CAPTURED:%.+]] = alloca %struct.anon -// CHECK-DEBUG-DAG: [[LOC_2_ADDR:%.+]] = alloca %ident_t +// CHECK-DEBUG: [[LOC_2_ADDR:%.+]] = alloca %ident_t // CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[LOC_2_ADDR]] to i8* // CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[DEF_LOC_2]] to i8* // CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i32 8, i1 false) -// CHECK-DEBUG: [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store i32* {{%[a-z0-9.]+}}, i32** [[ARGC_REF]] -// CHECK-DEBUG-NEXT: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4 +// CHECK-DEBUG: store i32 %argc, i32* [[ARGC_ADDR:%.+]], +// CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC1]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] -// CHECK-DEBUG-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* -// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]]) +// CHECK-DEBUG: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]]) // CHECK-DEBUG-NEXT: [[ARGV:%.+]] = load i8**, i8*** {{%[a-z0-9.]+}} // CHECK-DEBUG-NEXT: [[RET:%.+]] = call i32 [[TMAIN:@.+tmain.+]](i8** [[ARGV]]) // CHECK-DEBUG-NEXT: ret i32 [[RET]] // CHECK-DEBUG-NEXT: } -// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context) -// CHECK: #[[FN_ATTRS:[0-9]+]] -// CHECK: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon* -// CHECK: store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]] -// CHECK: [[CONTEXT_PTR:%.+]] = load %struct.anon*, %struct.anon** [[CONTEXT_ADDR]] -// CHECK-NEXT: [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CONTEXT_PTR]], i32 0, i32 0 -// CHECK-NEXT: [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]] +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) [[ARGC_ADDR:%[^)]+]]) +// CHECK-SAME: #[[FN_ATTRS:[0-9]+]] +// CHECK: store i32* [[ARGC_ADDR]], i32** [[ARGC_PTR_ADDR:%.+]], +// CHECK-NEXT: [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_ADDR]] // CHECK-NEXT: [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]] // CHECK-NEXT: invoke {{.*}}void [[FOO:@.+foo.+]](i32{{[ ]?[a-z]*}} [[ARGC]]) // CHECK: call {{.+}} @__kmpc_cancel_barrier( @@ -75,13 +62,10 @@ int main (int argc, char **argv) { // CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } -// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon* %__context) -// CHECK-DEBUG: #[[FN_ATTRS:[0-9]+]] -// CHECK-DEBUG: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon* -// CHECK-DEBUG: store %struct.anon* %__context, %struct.anon** [[CONTEXT_ADDR]] -// CHECK-DEBUG: [[CONTEXT_PTR:%.+]] = load %struct.anon*, %struct.anon** [[CONTEXT_ADDR]] -// CHECK-DEBUG-NEXT: [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon, %struct.anon* [[CONTEXT_PTR]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_REF]] +// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) [[ARGC_ADDR:%[^)]+]]) +// CHECK-DEBUG-SAME: #[[FN_ATTRS:[0-9]+]] +// CHECK-DEBUG: store i32* [[ARGC_ADDR]], i32** [[ARGC_PTR_ADDR:%.+]], +// CHECK-DEBUG: [[ARGC_REF:%.+]] = load i32*, i32** [[ARGC_PTR_ADDR]] // CHECK-DEBUG-NEXT: [[ARGC:%.+]] = load i32, i32* [[ARGC_REF]] // CHECK-DEBUG-NEXT: invoke void [[FOO:@.+foo.+]](i32 [[ARGC]]) // CHECK-DEBUG: call {{.+}} @__kmpc_cancel_barrier( @@ -96,34 +80,25 @@ int main (int argc, char **argv) { // CHECK-DEBUG-DAG: declare void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, ...)*, ...) // CHECK: define linkonce_odr {{[a-z\_\b]*[ ]?i32}} [[TMAIN]](i8** %argc) -// CHECK: [[AGG_CAPTURED:%.+]] = alloca %struct.anon.0 -// CHECK: [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-NEXT: store i8*** {{%[a-z0-9.]+}}, i8**** [[ARGC_REF]] -// CHECK-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* -// CHECK-NEXT: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]]) +// CHECK: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], +// CHECK-NEXT: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC_2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) // CHECK-NEXT: ret i32 0 // CHECK-NEXT: } // CHECK-DEBUG: define linkonce_odr i32 [[TMAIN]](i8** %argc) -// CHECK-DEBUG-DAG: [[AGG_CAPTURED:%.+]] = alloca %struct.anon.0 // CHECK-DEBUG-DAG: [[LOC_2_ADDR:%.+]] = alloca %ident_t // CHECK-DEBUG: [[KMPC_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[LOC_2_ADDR]] to i8* // CHECK-DEBUG-NEXT: [[KMPC_DEFAULT_LOC_VOIDPTR:%.+]] = bitcast %ident_t* [[DEF_LOC_2]] to i8* // CHECK-DEBUG-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[KMPC_LOC_VOIDPTR]], i8* [[KMPC_DEFAULT_LOC_VOIDPTR]], i64 24, i32 8, i1 false) -// CHECK-DEBUG: [[ARGC_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store i8*** {{%[a-z0-9.]+}}, i8**** [[ARGC_REF]] -// CHECK-DEBUG-NEXT: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4 +// CHECK-DEBUG-NEXT: store i8** %argc, i8*** [[ARGC_ADDR:%.+]], +// CHECK-DEBUG: [[KMPC_LOC_PSOURCE_REF:%.+]] = getelementptr inbounds %ident_t, %ident_t* [[LOC_2_ADDR]], i32 0, i32 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.+}} x i8], [{{.+}} x i8]* [[LOC2]], i32 0, i32 0), i8** [[KMPC_LOC_PSOURCE_REF]] -// CHECK-DEBUG-NEXT: [[BITCAST:%.+]] = bitcast %struct.anon.0* [[AGG_CAPTURED]] to i8* -// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.anon.0*)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8* [[BITCAST]]) +// CHECK-DEBUG-NEXT: call void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[LOC_2_ADDR]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***)* [[OMP_OUTLINED:@.+]] to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]]) // CHECK-DEBUG-NEXT: ret i32 0 // CHECK-DEBUG-NEXT: } -// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context) -// CHECK: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0* -// CHECK: store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]] -// CHECK: [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]] -// CHECK-NEXT: [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[CONTEXT_PTR]], i32 0, i32 0 -// CHECK-NEXT: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]] +// CHECK: define internal {{.*}}void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable(8) %argc) +// CHECK: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], +// CHECK-NEXT: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // CHECK-NEXT: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] // CHECK-NEXT: invoke {{.*}}void [[FOO1:@.+foo.+]](i8** [[ARGC]]) // CHECK: call {{.+}} @__kmpc_cancel_barrier( @@ -131,12 +106,9 @@ int main (int argc, char **argv) { // CHECK: call {{.*}}void @{{.+terminate.*|abort}}( // CHECK-NEXT: unreachable // CHECK-NEXT: } -// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* %.global_tid., i32* %.bound_tid., %struct.anon.0* %__context) -// CHECK-DEBUG: [[CONTEXT_ADDR:%.+]] = alloca %struct.anon.0* -// CHECK-DEBUG: store %struct.anon.0* %__context, %struct.anon.0** [[CONTEXT_ADDR]] -// CHECK-DEBUG: [[CONTEXT_PTR:%.+]] = load %struct.anon.0*, %struct.anon.0** [[CONTEXT_ADDR]] -// CHECK-DEBUG-NEXT: [[ARGC_PTR_REF:%.+]] = getelementptr inbounds %struct.anon.0, %struct.anon.0* [[CONTEXT_PTR]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_REF]] +// CHECK-DEBUG: define internal void [[OMP_OUTLINED]](i32* noalias %.global_tid., i32* noalias %.bound_tid., i8*** dereferenceable(8) %argc) +// CHECK-DEBUG: store i8*** %argc, i8**** [[ARGC_PTR_ADDR:%.+]], +// CHECK-DEBUG: [[ARGC_REF:%.+]] = load i8***, i8**** [[ARGC_PTR_ADDR]] // CHECK-DEBUG-NEXT: [[ARGC:%.+]] = load i8**, i8*** [[ARGC_REF]] // CHECK-DEBUG-NEXT: invoke void [[FOO1:@.+foo.+]](i8** [[ARGC]]) // CHECK-DEBUG: call {{.+}} @__kmpc_cancel_barrier( |