summaryrefslogtreecommitdiffstats
path: root/clang/test/OpenMP/declare_target_codegen_globalization.cpp
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2018-03-15 18:10:54 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2018-03-15 18:10:54 +0000
commitc99042ba973be37d9a5bcbb258f83251bf36b673 (patch)
tree1a044cc4c4e89e1b16fe04cd607ad1c76ccb8578 /clang/test/OpenMP/declare_target_codegen_globalization.cpp
parent3dbc362fe295d9c5855223abbd4c7f4e125be614 (diff)
downloadbcm5719-llvm-c99042ba973be37d9a5bcbb258f83251bf36b673.tar.gz
bcm5719-llvm-c99042ba973be37d9a5bcbb258f83251bf36b673.zip
[OPENMP, NVPTX] Improve globalization of the variables captured by value.
If the variable is captured by value and the corresponding parameter in the outlined function escapes its declaration context, this parameter must be globalized. To globalize it we need to get the address of the original parameter, load the value, store it to the global address and use this global address instead of the original. Patch improves globalization for parallel|teams regions + functions in declare target regions. llvm-svn: 327654
Diffstat (limited to 'clang/test/OpenMP/declare_target_codegen_globalization.cpp')
-rw-r--r--clang/test/OpenMP/declare_target_codegen_globalization.cpp44
1 files changed, 44 insertions, 0 deletions
diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
new file mode 100644
index 00000000000..db0df9bd930
--- /dev/null
+++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+int foo(int &a) { return a; }
+
+int bar() {
+ int a;
+ return foo(a);
+}
+
+// CHECK: define void @__omp_offloading_{{.*}}maini1{{.*}}_l[[@LINE+5]](i32* dereferenceable{{.*}})
+// CHECK-NOT: @__kmpc_data_sharing_push_stack
+
+int maini1() {
+ int a;
+#pragma omp target parallel map(from:a)
+ {
+ int b;
+ a = foo(b) + bar();
+ }
+ return a;
+}
+
+// parallel region
+// CHECK: define {{.*}}void @{{.*}}(i32* noalias {{.*}}, i32* noalias {{.*}}, i32* dereferenceable{{.*}})
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
+// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
+// CHECK: [[B_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: call {{.*}}[[FOO:@.*foo.*]](i32* dereferenceable{{.*}} [[B_ADDR]])
+// CHECK: call {{.*}}[[BAR:@.*bar.*]]()
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK: ret void
+
+// CHECK: define {{.*}}[[FOO]](i32* dereferenceable{{.*}})
+// CHECK-NOT: @__kmpc_data_sharing_push_stack
+
+// CHECK: define {{.*}}[[BAR]]()
+// CHECK: [[RES:%.+]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 0)
+// CHECK: [[GLOBALS:%.+]] = bitcast i8* [[RES]] to [[GLOBAL_ST:%struct[.].*]]*
+// CHECK: [[A_ADDR:%.+]] = getelementptr inbounds [[GLOBAL_ST]], [[GLOBAL_ST]]* [[GLOBALS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: call {{.*}}[[FOO]](i32* dereferenceable{{.*}} [[A_ADDR]])
+// CHECK: call void @__kmpc_data_sharing_pop_stack(i8* [[RES]])
+// CHECK: ret i32
OpenPOWER on IntegriCloud