summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2017-07-12 18:09:32 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2017-07-12 18:09:32 +0000
commit3344603f7b5e461e81c58b4cb332ee5ecdf6323b (patch)
treea79a9466a9f040e653b0ecd1b0e30a2d11ae2276
parent4450e73b5e8e214d848875eb8286f5b6448e1605 (diff)
downloadbcm5719-llvm-3344603f7b5e461e81c58b4cb332ee5ecdf6323b.tar.gz
bcm5719-llvm-3344603f7b5e461e81c58b4cb332ee5ecdf6323b.zip
[OPENMP] Emit implicit taskgroup block around taskloop directives.
If taskloop directive has no associated nogroup clause, it must emitted inside implicit taskgroup block. Runtime supports it, but we need to generate implicit taskgroup block explicitly to support future reductions codegen. llvm-svn: 307822
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp19
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp13
-rw-r--r--clang/test/OpenMP/taskloop_codegen.cpp6
-rw-r--r--clang/test/OpenMP/taskloop_simd_codegen.cpp6
4 files changed, 34 insertions, 10 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 3df95a4e9b2..a2ea0dec3e9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4157,9 +4157,15 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
/*IsInitializer=*/true);
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
llvm::Value *TaskArgs[] = {
- UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(),
- UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
- llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0),
+ UpLoc,
+ ThreadID,
+ Result.NewTask,
+ IfVal,
+ LBLVal.getPointer(),
+ UBLVal.getPointer(),
+ CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+ llvm::ConstantInt::getNullValue(
+ CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
llvm::ConstantInt::getSigned(
CGF.IntTy, Data.Schedule.getPointer()
? Data.Schedule.getInt() ? NumTasks : Grainsize
@@ -4168,10 +4174,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
/*isSigned=*/false)
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
- Result.TaskDupFn
- ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn,
- CGF.VoidPtrTy)
- : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
+ Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Result.TaskDupFn, CGF.VoidPtrTy)
+ : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
}
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 493cd627e41..71797e2e6fb 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -4363,7 +4363,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
CodeGen);
};
- EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ if (Data.Nogroup)
+ EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ else {
+ CGM.getOpenMPRuntime().emitTaskgroupRegion(
+ *this,
+ [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
+ PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data);
+ },
+ S.getLocStart());
+ }
}
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp
index bc7367ce021..b1b45455802 100644
--- a/clang/test/OpenMP/taskloop_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_codegen.cpp
@@ -8,6 +8,7 @@
// CHECK-LABEL: @main
int main(int argc, char **argv) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
+// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]])
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
@@ -19,6 +20,7 @@ int main(int argc, char **argv) {
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]])
#pragma omp taskloop priority(argc)
for (int i = 0; i < 10; ++i)
;
@@ -33,10 +35,11 @@ int main(int argc, char **argv) {
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
-// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 1, i64 [[GRAINSIZE]], i8* null)
#pragma omp taskloop nogroup grainsize(argc)
for (int i = 0; i < 10; ++i)
;
+// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]])
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
@@ -50,6 +53,7 @@ int main(int argc, char **argv) {
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null)
+// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]])
int i;
#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4)
for (i = 0; i < argc; ++i)
diff --git a/clang/test/OpenMP/taskloop_simd_codegen.cpp b/clang/test/OpenMP/taskloop_simd_codegen.cpp
index 60ba5f22129..48c6f479e9f 100644
--- a/clang/test/OpenMP/taskloop_simd_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_simd_codegen.cpp
@@ -8,6 +8,7 @@
// CHECK-LABEL: @main
int main(int argc, char **argv) {
// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]])
+// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]])
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
@@ -19,6 +20,7 @@ int main(int argc, char **argv) {
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]])
#pragma omp taskloop simd priority(argc)
for (int i = 0; i < 10; ++i)
;
@@ -33,10 +35,11 @@ int main(int argc, char **argv) {
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
-// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 1, i64 [[GRAINSIZE]], i8* null)
#pragma omp taskloop simd nogroup grainsize(argc) simdlen(4)
for (int i = 0; i < 10; ++i)
;
+// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]])
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]*
// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0
@@ -50,6 +53,7 @@ int main(int argc, char **argv) {
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null)
+// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]])
int i;
#pragma omp taskloop simd if(argc) shared(argc, argv) collapse(2) num_tasks(4) safelen(32)
for (i = 0; i < argc; ++i)
OpenPOWER on IntegriCloud