summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCarlo Bertolli <cbertol@us.ibm.com>2017-01-03 18:24:42 +0000
committerCarlo Bertolli <cbertol@us.ibm.com>2017-01-03 18:24:42 +0000
commit962bb807ecf08e27c0d4b4c4e08fcf10d4446a5d (patch)
tree134a1214e8ed5c4b7dcbf9b16bdb8750a439a05f
parent21c12044d29b8bd873c3f9803f441e79560f1742 (diff)
downloadbcm5719-llvm-962bb807ecf08e27c0d4b4c4e08fcf10d4446a5d.tar.gz
bcm5719-llvm-962bb807ecf08e27c0d4b4c4e08fcf10d4446a5d.zip
[OPENMP] Private, firstprivate, and lastprivate clauses for distribute, host code generation
https://reviews.llvm.org/D17840 This patch enables private, firstprivate, and lastprivate clauses for the OpenMP distribute directive. Regression tests differ from the similar case of the same clauses on the for directive, by removing a reference to two global variables g and g1. This is necessary because: 1. a distribute pragma is only allowed inside a target region; 2. referring a global variable (e.g. g and g1) in a target region requires the program to enclose the variable in a "declare target" region; 3. declare target pragmas, which are used to define a declare target region, are currently unavailable in clang (patch being prepared). For this reason, I moved the global declarations into local variables. llvm-svn: 290898
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp18
-rw-r--r--clang/test/OpenMP/distribute_lastprivate_codegen.cpp379
-rw-r--r--clang/test/OpenMP/distribute_private_codegen.cpp206
3 files changed, 603 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index ad4357af7c5..386c4f0fe69 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2770,6 +2770,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
auto &RT = CGM.getOpenMPRuntime();
+ bool HasLastprivateClause = false;
// Check pre-condition.
{
OMPLoopScope PreInitScope(*this, S);
@@ -2803,6 +2804,16 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
OMPPrivateScope LoopScope(*this);
+ if (EmitOMPFirstprivateClause(S, LoopScope)) {
+ // Emit implicit barrier to synchronize threads and avoid data races on
+ // initialization of firstprivate variables and post-update of
+ // lastprivate variables.
+ CGM.getOpenMPRuntime().emitBarrierCall(
+ *this, S.getLocStart(), OMPD_unknown, /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+ }
+ EmitOMPPrivateClause(S, LoopScope);
+ HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
EmitOMPPrivateLoopCounters(S, LoopScope);
(void)LoopScope.Privatize();
@@ -2859,6 +2870,13 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
LB.getAddress(), UB.getAddress(), ST.getAddress(),
IL.getAddress(), Chunk);
}
+
+ // Emit final copy of the lastprivate variables if IsLastIter != 0.
+ if (HasLastprivateClause)
+ EmitOMPLastprivateClauseFinal(
+ S, /*NoFinals=*/false,
+ Builder.CreateIsNotNull(
+ EmitLoadOfScalar(IL, S.getLocStart())));
}
// We're now done with the loop, so jump to the continuation block.
diff --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
new file mode 100644
index 00000000000..0f596defbfe
--- /dev/null
+++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
@@ -0,0 +1,379 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+ T f;
+ S(T a) : f(a) {}
+ S() : f() {}
+ operator T() { return T(); }
+ ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+ S<T> test;
+ T t_var = T();
+ T vec[] = {1, 2};
+ S<T> s_arr[] = {1, 2};
+ S<T> &var = test;
+ #pragma omp target
+ #pragma omp teams
+#pragma omp distribute lastprivate(t_var, vec, s_arr, s_arr, var, var)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ return T();
+}
+
+int main() {
+ static int svar;
+ volatile double g;
+ volatile double &g1 = g;
+
+ #ifdef LAMBDA
+ // LAMBDA-LABEL: @main
+ // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+ static float sfvar;
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+ // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+ // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+ // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
+ // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 4, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+ #pragma omp target
+ #pragma omp teams
+#pragma omp distribute lastprivate(g, g1, svar, sfvar)
+ for (int i = 0; i < 2; ++i) {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}}, double*{{.+}} [[G_IN:%.+]], double*{{.+}} [[G1_IN:%.+]], i{{[0-9]+}}*{{.+}} [[SVAR_IN:%.+]], float*{{.+}} [[SFVAR_IN:%.+]])
+ // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double*,
+ // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double*,
+ // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+ // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float*,
+ // LAMBDA: [[TMP_G1:%.+]] = alloca double*,
+ // loop variables
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: {{.+}} = alloca i{{[0-9]+}},
+ // LAMBDA: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[G_PRIVATE:%.+]] = alloca double,
+ // LAMBDA: [[G1_PRIVATE:%.+]] = alloca double,
+ // LAMBDA: [[TMP_G1_PRIVATE:%.+]] = alloca double*,
+ // LAMBDA: [[SVAR_PRIVATE:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[SFVAR_PRIVATE:%.+]] = alloca float,
+ // LAMBDA: store double* [[G_IN]], double** [[G_PRIVATE_ADDR]],
+ // LAMBDA: store double* [[G1_IN]], double** [[G1_PRIVATE_ADDR]],
+ // LAMBDA: store i{{[0-9]+}}* [[SVAR_IN]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+ // LAMBDA: store float* [[SFVAR_IN]], float** [[SFVAR_PRIVATE_ADDR]],
+
+ // init private variables
+ // LAMBDA: [[G_IN_REF:%.+]] = load double*, double** [[G_PRIVATE_ADDR]],
+ // LAMBDA: [[SVAR_IN_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR]],
+ // LAMBDA: [[SFVAR_IN_REF:%.+]] = load float*, float** [[SFVAR_PRIVATE_ADDR]],
+ // LAMBDA: [[G1_IN_REF:%.+]] = load double*, double** [[G1_PRIVATE_ADDR]],
+ // LAMBDA: store double* [[G1_IN_REF]], double** [[TMP_G1]],
+ // LAMBDA: [[TMP_G1_VAL:%.+]] = load double*, double** [[TMP_G1]],
+ // LAMBDA: store double* [[G1_PRIVATE]], double** [[TMP_G1_PRIVATE]],
+ g = 1;
+ g1 = 1;
+ svar = 3;
+ sfvar = 4.0;
+ // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+ // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE]],
+ // LAMBDA: [[TMP_G1_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+ // LAMBDA: store{{.+}} double 1.0{{.+}}, double* [[TMP_G1_REF]],
+ // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+ // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE]],
+ // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: store double* [[G_PRIVATE]], double** [[G_PRIVATE_ADDR_REF]],
+ // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+ // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+ // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+ // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+ // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+ // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+ // LAMBDA: store float* [[SFVAR_PRIVATE]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+ // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+ // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+ // LAMBDA: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+ // LAMBDA: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+ // LAMBDA: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+ // LAMBDA: [[OMP_LASTPRIV_BLOCK]]:
+ // LAMBDA: [[G_PRIV_VAL:%.+]] = load double, double* [[G_PRIVATE]],
+ // LAMBDA: store{{.*}} double [[G_PRIV_VAL]], double* [[G_IN_REF]],
+ // LAMBDA: [[TMP_G1_PRIV_REF:%.+]] = load double*, double** [[TMP_G1_PRIVATE]],
+ // LAMBDA: [[TMP_G1_PRIV_VAL:%.+]] = load double, double* [[TMP_G1_PRIV_REF]],
+ // LAMBDA: store{{.*}} double [[TMP_G1_PRIV_VAL]], double* [[TMP_G1_VAL]],
+
+ // LAMBDA: [[SVAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[SVAR_PRIVATE]],
+ // LAMBDA: store i{{[0-9]+}} [[SVAR_PRIV_VAL]], i{{[0-9]+}}* [[SVAR_IN_REF]],
+ // LAMBDA: [[SFVAR_PRIV_VAL:%.+]] = load float, float* [[SFVAR_PRIVATE]],
+ // LAMBDA: store float [[SFVAR_PRIV_VAL]], float* [[SFVAR_IN_REF]],
+ // LAMBDA: br label %[[OMP_LASTPRIV_DONE]]
+ // LAMBDA: [[OMP_LASTPRIV_DONE]]:
+ // LAMBDA: ret
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+ g = 2;
+ g1 = 2;
+ svar = 4;
+ sfvar = 8.0;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+ // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+ // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+ // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+ // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+ // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+ // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+ // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+ // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+ // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+ // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+ }();
+ }
+ }();
+ return 0;
+ #else
+ S<float> test;
+ int t_var = 0;
+ int vec[] = {1, 2};
+ S<float> s_arr[] = {1, 2};
+ S<float> &var = test;
+
+ #pragma omp target
+ #pragma omp teams
+#pragma omp distribute lastprivate(t_var, vec, s_arr, s_arr, var, var, svar)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ int i;
+
+ return tmain<int>();
+ #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]*{{.+}} {{.+}}, [2 x [[S_FLOAT_TY]]]*{{.+}} {{.+}}, [[S_FLOAT_TY]]*{{.+}} {{.+}}, i{{[0-9]+}} {{.+}})
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN:%.+]], [2 x [[S_FLOAT_TY]]]*{{.+}} [[S_ARR_IN:%.+]], [[S_FLOAT_TY]]*{{.+}} [[VAR_IN:%.+]], i{{[0-9]+}}*{{.*}} [[S_VAR_IN:%.+]])
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_FLOAT_TY]]]*,
+// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[SVAR_ADDR:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[TMP:%.*]] = alloca [[S_FLOAT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: [[TMP_PRIV:%.+]] = alloca [[S_FLOAT_TY]]*,
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+
+// copy from parameters to local address variables
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN]], i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN]], [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: store [2 x [[S_FLOAT_TY]]]* [[S_ARR_IN]], [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_IN]], [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store i{{[0-9]+}}* [[S_VAR_IN]], i{{[0-9]+}}** [[SVAR_ADDR]],
+
+// load content of local address variables
+// CHECK: [[T_VAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR]],
+// CHECK: [[VEC_ADDR_REF:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR]],
+// CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_FLOAT_TY]]]*, [2 x [[S_FLOAT_TY]]]** [[S_ARR_ADDR]],
+// CHECK: [[SVAR_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_ADDR]],
+// CHECK: [[VAR_ADDR_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[VAR_ADDR]],
+// CHECK: store [[S_FLOAT_TY]]* [[VAR_ADDR_REF]], [[S_FLOAT_TY]]** [[TMP]],
+// CHECK: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[TMP_REF:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP]],
+// the distribute loop
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// the following is extremely weak, because it assumes ordering of this load with no reference after the call to static_init: fix!!!!
+// CHECK: [[IV_VAL:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: [[VEC_PTR:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL]], i{{[0-9]+}}* [[VEC_PTR]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]],
+// CHECK-DAG: [[TMP_VAL:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_PTR]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST]], i8* [[TMP_VAL_BCAST]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_FLOAT_TY]]], [2 x [[S_FLOAT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_FLOAT_TY]]]* [[S_ARR_PRIV]] to [[S_FLOAT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_FLOAT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_FLOAT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_FLOAT_TY]]*, [[S_FLOAT_TY]]** [[TMP_PRIV]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_FLOAT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: [[SVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[S_VAR_PRIV]],
+// CHECK: store i{{[0-9]+}} [[SVAR_VAL]], i{{[0-9]+}}* [[SVAR_ADDR_REF]],
+// CHECK: ret void
+
+// template tmain
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
+// CHECK: ret
+
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]](
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 4,
+// CHECK: ret
+
+// CHECK: define internal void [[OMP_OUTLINED_1:@.+]](i{{[0-9]+}}* noalias [[GTID_ADDR1:%.+]], i{{[0-9]+}}* noalias %{{.+}}, i{{[0-9]+}}*{{.+}} [[T_VAR_IN1:%.+]], [2 x i{{[0-9]+}}]*{{.+}} [[VEC_IN1:%.+]], [2 x [[S_INT_TY]]]*{{.+}} [[S_ARR_IN1:%.+]], [[S_INT_TY]]*{{.+}} [[VAR_IN1:%.+]])
+// skip alloca of global_tid and bound_tid
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: {{.+}} = alloca i{{[0-9]+}}*,
+// CHECK: [[T_VAR_ADDR1:%.+]] = alloca i{{[0-9]+}}*,
+// CHECK: [[VEC_ADDR1:%.+]] = alloca [2 x i{{[0-9]+}}]*,
+// CHECK: [[S_ARR_ADDR1:%.+]] = alloca [2 x [[S_INT_TY]]]*,
+// CHECK: [[VAR_ADDR1:%.+]] = alloca [[S_INT_TY]]*,
+// CHECK: [[TMP:%.+]] = alloca [[S_INT_TY]]*,
+// skip loop variables
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: {{.+}} = alloca i{{[0-9]+}},
+// CHECK: [[OMP_IS_LAST1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[T_VAR_PRIV1:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV1:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV1:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV1:%.+]] = alloca [[S_INT_TY]],
+// CHECK: [[TMP_PRIV1:%.+]] = alloca [[S_INT_TY]]*,
+
+// skip init of bound and global tid
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// CHECK: store i{{[0-9]+}}* {{.*}},
+// copy from parameters to local address variables
+// CHECK: store i{{[0-9]+}}* [[T_VAR_IN1]], i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: store [2 x i{{[0-9]+}}]* [[VEC_IN1]], [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: store [2 x [[S_INT_TY]]]* [[S_ARR_IN1]], [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: store [[S_INT_TY]]* [[VAR_IN1]], [[S_INT_TY]]** [[VAR_ADDR1]],
+
+// load content of local address variables
+// CHECK: [[T_VAR_ADDR_REF1:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[T_VAR_ADDR1]],
+// CHECK: [[VEC_ADDR_REF1:%.+]] = load [2 x i{{[0-9]+}}]*, [2 x i{{[0-9]+}}]** [[VEC_ADDR1]],
+// CHECK: [[S_ARR_ADDR_REF1:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR1]],
+// CHECK: [[VAR_ADDR1_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[VAR_ADDR1]],
+// CHECK-DAG: store [[S_INT_TY]]* [[VAR_ADDR1_REF]], [[S_INT_TY]]** [[TMP]],
+// CHECK-DAG: store i{{[0-9]+}} 0, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK-DAG: [[TMP_REF:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP]],
+// CHECK: call void @__kmpc_for_static_init_4(
+// assignment: vec[i] = t_var;
+// CHECK: [[IV_VAL1:%.+]] =
+// CHECK: [[T_VAR_PRIV_VAL1:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: [[VEC_PTR1:%.+]] = getelementptr inbounds [2 x i{{[0-9]+}}], [2 x i{{[0-9]+}}]* [[VEC_PRIV1]], i{{[0-9]+}} 0, i{{[0-9]+}} {{.+}}
+// CHECK: store i{{[0-9]+}} [[T_VAR_PRIV_VAL1]], i{{[0-9]+}}* [[VEC_PTR1]],
+
+// assignment: s_arr[i] = var;
+// CHECK-DAG: [[S_ARR_PTR1:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]],
+// CHECK-DAG: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK-DAG: [[S_ARR_PTR_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_PTR1]] to i8*
+// CHECK-DAG: [[TMP_VAL_BCAST1:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK-DAG: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_PTR_BCAST1]], i8* [[TMP_VAL_BCAST1]],
+// CHECK: call void @__kmpc_for_static_fini(
+
+// lastprivates
+// CHECK: [[OMP_IS_LAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[OMP_IS_LAST1]],
+// CHECK: [[IS_LAST_IT:%.+]] = icmp ne i{{[0-9]+}} [[OMP_IS_LAST_VAL]], 0
+// CHECK: br i1 [[IS_LAST_IT]], label %[[OMP_LASTPRIV_BLOCK:.+]], label %[[OMP_LASTPRIV_DONE:.+]]
+
+// CHECK: [[OMP_LASTPRIV_BLOCK]]:
+// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[T_VAR_PRIV1]],
+// CHECK: store i{{[0-9]+}} [[T_VAR_VAL]], i{{[0-9]+}}* [[T_VAR_ADDR_REF1]],
+// CHECK: [[BCAST_VEC_ADDR_REF:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_REF1]] to i8*
+// CHECK: [[BCAST_VEC_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[BCAST_VEC_ADDR_REF]], i8* [[BCAST_VEC_PRIV]],
+// CHECK: [[S_ARR_BEGIN:%.+]] = getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[S_ARR_ADDR_REF]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+// CHECK: [[S_ARR_PRIV_BCAST:%.+]] = bitcast [2 x [[S_INT_TY]]]* [[S_ARR_PRIV1]] to [[S_INT_TY]]*
+// CHECK: [[S_ARR_BEGIN_GEP:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_BEGIN]], i{{[0-9]+}} 2
+// CHECK: [[S_ARR_IS_EMPTY:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_BEGIN]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[S_ARR_IS_EMPTY]], label %[[S_ARR_COPY_DONE:.+]], label %[[S_ARR_COPY_BLOCK:.+]]
+// CHECK: [[S_ARR_COPY_BLOCK]]:
+// CHECK: [[S_ARR_SRC_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_EL:%.+]] = phi [[S_INT_TY]]*{{.+}}
+// CHECK: [[S_ARR_DST_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_DST_EL]] to i8*
+// CHECK: [[S_ARR_SRC_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[S_ARR_SRC_EL]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[S_ARR_DST_BCAST]], i8* [[S_ARR_SRC_BCAST]]{{.+}})
+// CHECK: [[S_ARR_DST_NEXT:%.+]] = getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_DST_EL]], i{{[0-9]+}} 1
+// CHECK: [[S_ARR_SRC_NEXT:%.+]] = getelementptr{{.+}}
+// CHECK: [[CPY_IS_FINISHED:%.+]] = icmp eq [[S_INT_TY]]* [[S_ARR_DST_NEXT]], [[S_ARR_BEGIN_GEP]]
+// CHECK: br i1 [[CPY_IS_FINISHED]], label %[[S_ARR_COPY_DONE]], label %[[S_ARR_COPY_BLOCK]]
+// CHECK: [[S_ARR_COPY_DONE]]:
+// CHECK: [[TMP_VAL1:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[TMP_PRIV1]],
+// CHECK: [[VAR_ADDR_REF_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_REF]] to i8*
+// CHECK: [[TMP_VAL1_BCAST:%.+]] = bitcast [[S_INT_TY]]* [[TMP_VAL1]] to i8*
+// CHECK: call void @llvm.memcpy.{{.+}}(i8* [[VAR_ADDR_REF_BCAST]], i8* [[TMP_VAL1_BCAST]],{{.+}})
+// CHECK: ret void
+#endif
diff --git a/clang/test/OpenMP/distribute_private_codegen.cpp b/clang/test/OpenMP/distribute_private_codegen.cpp
new file mode 100644
index 00000000000..c637fb48924
--- /dev/null
+++ b/clang/test/OpenMP/distribute_private_codegen.cpp
@@ -0,0 +1,206 @@
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-64
+// RUN: %clang_cc1 -DLAMBDA -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -DLAMBDA -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix LAMBDA --check-prefix LAMBDA-32
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+template <class T>
+struct S {
+ T f;
+ S(T a) : f(a) {}
+ S() : f() {}
+ operator T() { return T(); }
+ ~S() {}
+};
+
+// CHECK: [[S_FLOAT_TY:%.+]] = type { float }
+// CHECK: [[S_INT_TY:%.+]] = type { i{{[0-9]+}} }
+template <typename T>
+T tmain() {
+ S<T> test;
+ T t_var = T();
+ T vec[] = {1, 2};
+ S<T> s_arr[] = {1, 2};
+ S<T> &var = test;
+ #pragma omp target
+ #pragma omp teams
+#pragma omp distribute private(t_var, vec, s_arr, s_arr, var, var)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ return T();
+}
+
+int main() {
+ static int svar;
+ volatile double g;
+ volatile double &g1 = g;
+
+ #ifdef LAMBDA
+ // LAMBDA-LABEL: @main
+ // LAMBDA: call{{.*}} void [[OUTER_LAMBDA:@.+]](
+ [&]() {
+ static float sfvar;
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
+ // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+ // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
+
+ // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
+ // LAMBDA: call {{.*}}void {{.+}} @__kmpc_fork_teams({{.+}}, i32 0, {{.+}}* [[OMP_OUTLINED:@.+]] to {{.+}})
+ #pragma omp target
+ #pragma omp teams
+#pragma omp distribute private(g, g1, svar, sfvar)
+ for (int i = 0; i < 2; ++i) {
+ // LAMBDA: define{{.*}} internal{{.*}} void [[OMP_OUTLINED]](i32* noalias %{{.+}}, i32* noalias %{{.+}})
+ // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca double,
+ // LAMBDA: [[G1_PRIVATE_ADDR:%.+]] = alloca double,
+ // LAMBDA: [[TMP_PRIVATE_ADDR:%.+]] = alloca double*,
+ // LAMBDA: [[SVAR_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}},
+ // LAMBDA: [[SFVAR_PRIVATE_ADDR:%.+]] = alloca float,
+ // LAMBDA: store double* [[G1_PRIVATE_ADDR]], double** [[TMP_PRIVATE_ADDR]],
+ g = 1;
+ g1 = 1;
+ svar = 3;
+ sfvar = 4.0;
+ // LAMBDA: call {{.*}}void @__kmpc_for_static_init_4(
+ // LAMBDA: store double 1.0{{.+}}, double* [[G_PRIVATE_ADDR]],
+ // LAMBDA: store i{{[0-9]+}} 3, i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]],
+ // LAMBDA: store float 4.0{{.+}}, float* [[SFVAR_PRIVATE_ADDR]],
+ // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: store double* [[G_PRIVATE_ADDR]], double** [[G_PRIVATE_ADDR_REF]],
+ // LAMBDA: [[TMP_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+ // LAMBDA: [[G1_PRIVATE_ADDR_FROM_TMP:%.+]] = load double*, double** [[TMP_PRIVATE_ADDR]],
+ // LAMBDA: store double* [[G1_PRIVATE_ADDR_FROM_TMP]], double** [[TMP_PRIVATE_ADDR_REF]],
+ // LAMBDA: [[SVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+ // LAMBDA: store i{{[0-9]+}}* [[SVAR_PRIVATE_ADDR]], i{{[0-9]+}}** [[SVAR_PRIVATE_ADDR_REF]]
+ // LAMBDA: [[SFVAR_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+ // LAMBDA: store float* [[SFVAR_PRIVATE_ADDR]], float** [[SFVAR_PRIVATE_ADDR_REF]]
+ // LAMBDA: call{{.*}} void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]])
+ // LAMBDA: call {{.*}}void @__kmpc_for_static_fini(
+ [&]() {
+ // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]])
+ // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]],
+ g = 2;
+ g1 = 2;
+ svar = 4;
+ sfvar = 8.0;
+ // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]]
+ // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
+ // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]]
+ // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]]
+
+ // LAMBDA: [[TMP_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1
+ // LAMBDA: [[G1_REF:%.+]] = load double*, double** [[TMP_PTR_REF]]
+ // LAMBDA: store double 2.0{{.+}}, double* [[G1_REF]],
+ // LAMBDA: [[SVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2
+ // LAMBDA: [[SVAR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[SVAR_PTR_REF]]
+ // LAMBDA: store i{{[0-9]+}} 4, i{{[0-9]+}}* [[SVAR_REF]]
+ // LAMBDA: [[SFVAR_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 3
+ // LAMBDA: [[SFVAR_REF:%.+]] = load float*, float** [[SFVAR_PTR_REF]]
+ // LAMBDA: store float 8.0{{.+}}, float* [[SFVAR_REF]]
+ }();
+ }
+ }();
+ return 0;
+ #else
+ S<float> test;
+ int t_var = 0;
+ int vec[] = {1, 2};
+ S<float> s_arr[] = {1, 2};
+ S<float> &var = test;
+
+ #pragma omp target
+ #pragma omp teams
+#pragma omp distribute private(t_var, vec, s_arr, s_arr, var, var, svar)
+ for (int i = 0; i < 2; ++i) {
+ vec[i] = t_var;
+ s_arr[i] = var;
+ }
+ int i;
+
+ #pragma omp target
+ #pragma omp teams
+#pragma omp distribute private(i)
+ for (i = 0; i < 2; ++i) {
+ ;
+ }
+ return tmain<int>();
+ #endif
+}
+
+// CHECK: define{{.*}} i{{[0-9]+}} @main()
+// CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN:@.+]](
+// CHECK: ret
+
+// CHECK: define{{.+}} [[OFFLOAD_FUN]]()
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED:@.+]] to void
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
+// CHECK-NOT: alloca [2 x [[S_FLOAT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
+// CHECK-NOT: alloca [[S_FLOAT_TY]],
+// CHECK: [[S_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_FLOAT_TY]]*
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[S_ARR_PRIV_ITEM]])
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+
+// CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
+// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call void [[OFFLOAD_FUN_1:@.+]](
+// CHECK: ret
+
+
+// CHECK: define internal void [[OFFLOAD_FUN_1]]()
+// CHECK: call void (%{{.+}}*, i{{[0-9]+}}, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)*, ...) @__kmpc_fork_teams(%{{.+}}* @{{.+}}, i{{[0-9]+}} 0, void (i{{[0-9]+}}*, i{{[0-9]+}}*, ...)* bitcast (void (i{{[0-9]+}}*, i{{[0-9]+}}*)* [[OMP_OUTLINED_1:@.+]] to void
+// CHECK: ret
+//
+// CHECK: define internal void [[OMP_OUTLINED_1]](i{{[0-9]+}}* noalias [[GTID_ADDR:%.+]], i{{[0-9]+}}* noalias %{{.+}})
+// CHECK: [[T_VAR_PRIV:%.+]] = alloca i{{[0-9]+}},
+// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
+// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
+// CHECK-NOT: alloca [2 x [[S_INT_TY]]],
+// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
+// CHECK-NOT: alloca [[S_INT_TY]],
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_REF:%.+]]
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: {{.+}}:
+// CHECK: [[S_ARR_PRIV_ITEM:%.+]] = phi [[S_INT_TY]]*
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[S_ARR_PRIV_ITEM]])
+// CHECK-NOT: [[T_VAR_PRIV]]
+// CHECK-NOT: [[VEC_PRIV]]
+// CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]])
+// CHECK: call void @__kmpc_for_static_init_4(
+// CHECK: call void @__kmpc_for_static_fini(
+// CHECK: ret void
+#endif
OpenPOWER on IntegriCloud