summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp23
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h10
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp16
-rw-r--r--clang/test/OpenMP/taskloop_codegen.cpp14
4 files changed, 42 insertions, 21 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 19a58f06feb..ec939dd9db3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3777,7 +3777,8 @@ void CGOpenMPRuntime::emitTaskCall(
void CGOpenMPRuntime::emitTaskLoopCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
- bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, bool Nogroup,
+ bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule, bool Nogroup,
unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> PrivateCopies,
@@ -3825,17 +3826,19 @@ void CGOpenMPRuntime::emitTaskLoopCall(
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
/*IsInitializer=*/true);
+ enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
llvm::Value *TaskArgs[] = {
- UpLoc,
- ThreadID,
- Data.NewTask,
- IfVal,
- LBLVal.getPointer(),
- UBLVal.getPointer(),
- CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+ UpLoc, ThreadID, Data.NewTask, IfVal, LBLVal.getPointer(),
+ UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
llvm::ConstantInt::getSigned(CGF.IntTy, Nogroup ? 1 : 0),
- llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/0),
- llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, Schedule.getPointer()
+ ? Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+ Schedule.getPointer()
+ ? CGF.Builder.CreateIntCast(Schedule.getPointer(), CGF.Int64Ty,
+ /*isSigned=*/false)
+ : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 68fe1ccc981..19eb3622f0a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -858,6 +858,9 @@ public:
/// \param Final Contains either constant bool value, or llvm::Value * of i1
/// type for final clause. If the value is true, the task forces all of its
/// child tasks to become final and included tasks.
+ /// \param Schedule If Pointer is nullptr, no grainsize/num_tasks clauses were
+ /// specified. If IntVal is false - it is for grainsize clause, true - for
+ /// num_tasks clause.
/// \param Nogroup true if nogroup clause was specified, false otherwise.
/// \param NumberOfParts Number of parts in untied taskloops.
/// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
@@ -881,9 +884,10 @@ public:
virtual void emitTaskLoopCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
- bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction,
- QualType SharedsTy, Address Shareds, const Expr *IfCond,
- ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies,
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule, bool Nogroup,
+ unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+ Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+ ArrayRef<const Expr *> PrivateCopies,
ArrayRef<const Expr *> FirstprivateVars,
ArrayRef<const Expr *> FirstprivateCopies,
ArrayRef<const Expr *> FirstprivateInits);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 9a9fdfcb7f4..ae783f5dde7 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3362,6 +3362,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
// By default the task is not final.
Final.setInt(/*IntVal=*/false);
}
+ llvm::PointerIntPair<llvm::Value * /*no grainsize/num_tasks=nullptr*/, 1,
+ bool /*Grainsize=false, NumTasks=true*/>
+ Schedule;
+ if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
+ // grainsize clause
+ Schedule.setInt(/*IntVal=*/false);
+ Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+ } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
+ // num_tasks clause
+ Schedule.setInt(/*IntVal=*/true);
+ Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+ }
auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
// if (PreCond) {
@@ -3433,13 +3445,13 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.EmitBlock(ContBlock, true);
}
};
- auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final,
+ auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final, &Schedule,
Nogroup](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
const OMPPrivateDataTy &Data) {
auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(
- CGF, S.getLocStart(), S, Data.Tied, Final, Nogroup,
+ CGF, S.getLocStart(), S, Data.Tied, Final, Schedule, Nogroup,
Data.NumberOfParts, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
Data.PrivateVars, Data.PrivateCopies, Data.FirstprivateVars,
Data.FirstprivateCopies, Data.FirstprivateInits);
diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp
index f3ae1b12208..02a5bf1af7a 100644
--- a/clang/test/OpenMP/taskloop_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_codegen.cpp
@@ -37,8 +37,9 @@ int main(int argc, char **argv) {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
-// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null)
-#pragma omp taskloop nogroup
+// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
+#pragma omp taskloop nogroup grainsize(argc)
for (int i = 0; i < 10; ++i)
;
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
@@ -55,9 +56,9 @@ int main(int argc, char **argv) {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
-// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null)
int i;
-#pragma omp taskloop if(argc) shared(argc, argv) collapse(2)
+#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4)
for (i = 0; i < argc; ++i)
for (int j = argc; j < argv[argc][argc]; ++j)
;
@@ -158,8 +159,9 @@ struct S {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
-// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
-#pragma omp taskloop shared(c)
+// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null)
+#pragma omp taskloop shared(c) num_tasks(a)
for (a = 0; a < c; ++a)
;
}
OpenPOWER on IntegriCloud