summaryrefslogtreecommitdiffstats
path: root/clang
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2015-05-07 04:25:17 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2015-05-07 04:25:17 +0000
commit53223c986cfbc16179dc82353812038ce3362900 (patch)
tree231373c939510933031b4aa8f165d32d317d50a9 /clang
parent4fce73aabf7f17ce9a43ba55d7158bc3da129487 (diff)
downloadbcm5719-llvm-53223c986cfbc16179dc82353812038ce3362900.tar.gz
bcm5719-llvm-53223c986cfbc16179dc82353812038ce3362900.zip
[OPENMP] Generate !llvm.mem.loop_parallel_access metadata for loops with dynamic/guided scheduling.
Inner bodies of OpenMP worksharing loop-based constructs with dynamic or guided scheduling are allowed to be marked with !llvm.mem.parallel_loop_access metadata for better optimization. Worksharing constructs with static scheduling cannot be marked this way (according to OpenMP standard "A data dependence between the same logical iterations in two such loops is guaranteed"). Constructs with auto and runtime scheduling are also not marked because automatically chosen scheduling may be static also. Differential Revision: http://reviews.llvm.org/D9518 llvm-svn: 236693
Diffstat (limited to 'clang')
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp5
-rw-r--r--clang/test/OpenMP/for_codegen.cpp9
-rw-r--r--clang/test/OpenMP/ordered_codegen.cpp4
3 files changed, 16 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 1f06e244630..21b1bcb9d3a 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -886,6 +886,11 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind,
bool DynamicWithOrderedClause =
Dynamic && S.getSingleClause(OMPC_ordered) != nullptr;
SourceLocation Loc = S.getLocStart();
+ // Generate !llvm.loop.parallel metadata for loads and stores for loops with
+ // dynamic/guided scheduling and without ordered clause.
+ LoopStack.setParallel((ScheduleKind == OMPC_SCHEDULE_dynamic ||
+ ScheduleKind == OMPC_SCHEDULE_guided) &&
+ !DynamicWithOrderedClause);
EmitOMPInnerLoop(
S, LoopScope.requiresCleanups(), S.getCond(/*SeparateIter=*/false),
S.getInc(),
diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp
index 31161e26e1e..6939108933c 100644
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -41,6 +41,7 @@ void without_schedule_clause(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
@@ -81,6 +82,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
@@ -129,6 +131,7 @@ void static_chunked(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add i32 [[IV1_2]], 1
@@ -180,7 +183,7 @@ void dynamic1(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
// ... loop body ...
// End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK: store float [[RESULT:%.+]], float* {{%.+}}!llvm.mem.parallel_loop_access
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
@@ -221,7 +224,7 @@ void guided7(float *a, float *b, float *c, float *d) {
// CHECK-NEXT: store i64 [[CALC_I_2]], i64* [[LC_I:.+]]
// ... loop body ...
// End of body: store into a[i]:
-// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK: store float [[RESULT:%.+]], float* {{%.+}}!llvm.mem.parallel_loop_access
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add i64 [[IV1_2]], 1
@@ -266,6 +269,7 @@ void test_auto(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i64, i64* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i64 [[IV1_2]], 1
@@ -307,6 +311,7 @@ void runtime(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
a[i] = b[i] * c[i] * d[i];
// CHECK: [[IV1_2:%.+]] = load i32, i32* [[OMP_IV]]{{.*}}
// CHECK-NEXT: [[ADD1_2:%.+]] = add nsw i32 [[IV1_2]], 1
diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp
index 28ff3baacca..169328b9062 100644
--- a/clang/test/OpenMP/ordered_codegen.cpp
+++ b/clang/test/OpenMP/ordered_codegen.cpp
@@ -39,6 +39,7 @@ void static_not_chunked(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
// ... end of ordered region ...
#pragma omp ordered
@@ -86,6 +87,7 @@ void dynamic1(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
// ... end of ordered region ...
#pragma omp ordered
@@ -139,6 +141,7 @@ void test_auto(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
// ... end of ordered region ...
#pragma omp ordered
@@ -189,6 +192,7 @@ void runtime(float *a, float *b, float *c, float *d) {
// ... loop body ...
// End of body: store into a[i]:
// CHECK: store float [[RESULT:%.+]], float* {{%.+}}
+// CHECK-NOT: !llvm.mem.parallel_loop_access
// CHECK-NEXT: call void @__kmpc_end_ordered([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]])
// ... end of ordered region ...
#pragma omp ordered
OpenPOWER on IntegriCloud