diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2017-07-12 18:09:32 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2017-07-12 18:09:32 +0000 |
commit | 3344603f7b5e461e81c58b4cb332ee5ecdf6323b (patch) | |
tree | a79a9466a9f040e653b0ecd1b0e30a2d11ae2276 | |
parent | 4450e73b5e8e214d848875eb8286f5b6448e1605 (diff) | |
download | bcm5719-llvm-3344603f7b5e461e81c58b4cb332ee5ecdf6323b.tar.gz bcm5719-llvm-3344603f7b5e461e81c58b4cb332ee5ecdf6323b.zip |
[OPENMP] Emit implicit taskgroup block around taskloop directives.
If taskloop directive has no associated nogroup clause, it must emitted
inside implicit taskgroup block. Runtime supports it, but we need to
generate implicit taskgroup block explicitly to support future
reductions codegen.
llvm-svn: 307822
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 19 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 13 | ||||
-rw-r--r-- | clang/test/OpenMP/taskloop_codegen.cpp | 6 | ||||
-rw-r--r-- | clang/test/OpenMP/taskloop_simd_codegen.cpp | 6 |
4 files changed, 34 insertions, 10 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 3df95a4e9b2..a2ea0dec3e9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4157,9 +4157,15 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, /*IsInitializer=*/true); enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; llvm::Value *TaskArgs[] = { - UpLoc, ThreadID, Result.NewTask, IfVal, LBLVal.getPointer(), - UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()), - llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0), + UpLoc, + ThreadID, + Result.NewTask, + IfVal, + LBLVal.getPointer(), + UBLVal.getPointer(), + CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + llvm::ConstantInt::getNullValue( + CGF.IntTy), // Always 0 because taskgroup emitted by the compiler llvm::ConstantInt::getSigned( CGF.IntTy, Data.Schedule.getPointer() ? Data.Schedule.getInt() ? NumTasks : Grainsize @@ -4168,10 +4174,9 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false) : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), - Result.TaskDupFn - ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Result.TaskDupFn, - CGF.VoidPtrTy) - : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; + Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Result.TaskDupFn, CGF.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 493cd627e41..71797e2e6fb 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -4363,7 +4363,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, CodeGen); }; - EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + if (Data.Nogroup) + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + else { + CGM.getOpenMPRuntime().emitTaskgroupRegion( + *this, + [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, + PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Data); + }, + S.getLocStart()); + } } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp index bc7367ce021..b1b45455802 100644 --- a/clang/test/OpenMP/taskloop_codegen.cpp +++ b/clang/test/OpenMP/taskloop_codegen.cpp @@ -8,6 +8,7 @@ // CHECK-LABEL: @main int main(int argc, char **argv) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) +// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 @@ -19,6 +20,7 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) #pragma omp taskloop priority(argc) for (int i = 0; i < 10; ++i) ; @@ -33,10 +35,11 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64 -// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null) +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 1, i64 [[GRAINSIZE]], i8* null) #pragma omp taskloop nogroup grainsize(argc) for (int i = 0; i < 10; ++i) ; +// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 @@ -50,6 +53,7 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) int i; #pragma omp taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4) for (i = 0; i < argc; ++i) diff --git a/clang/test/OpenMP/taskloop_simd_codegen.cpp b/clang/test/OpenMP/taskloop_simd_codegen.cpp index 60ba5f22129..48c6f479e9f 100644 --- a/clang/test/OpenMP/taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_codegen.cpp @@ -8,6 +8,7 @@ // CHECK-LABEL: @main int main(int argc, char **argv) { // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) +// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 33, i64 72, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 @@ -19,6 +20,7 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) #pragma omp taskloop simd priority(argc) for (int i = 0; i < 10; ++i) ; @@ -33,10 +35,11 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64 -// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null) +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 1, i64 [[GRAINSIZE]], i8* null) #pragma omp taskloop simd nogroup grainsize(argc) simdlen(4) for (int i = 0; i < 10; ++i) ; +// CHECK: call void @__kmpc_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) // CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 72, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) // CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* // CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 @@ -50,6 +53,7 @@ int main(int argc, char **argv) { // CHECK: store i64 1, i64* [[ST]], // CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], // CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null) +// CHECK: call void @__kmpc_end_taskgroup(%ident_t* [[DEFLOC]], i32 [[GTID]]) int i; #pragma omp taskloop simd if(argc) shared(argc, argv) collapse(2) num_tasks(4) safelen(32) for (i = 0; i < argc; ++i) |