summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2016-04-28 09:15:06 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2016-04-28 09:15:06 +0000
commit2b19a6fe539d221472c92715dc90c56cb17854bd (patch)
tree62a806881bcde221529ade2de82fd31d70c8a962
parent35e9ea38127b99a261f946a415d3641538b8baa3 (diff)
downloadbcm5719-llvm-2b19a6fe539d221472c92715dc90c56cb17854bd.tar.gz
bcm5719-llvm-2b19a6fe539d221472c92715dc90c56cb17854bd.zip
[OPENMP 4.5] Codegen for 'grainsize/num_tasks' clauses of 'taskloop'
directive. OpenMP 4.5 defines 'taskloop' directive and 2 additional clauses 'grainsize' and 'num_tasks' for this directive. Patch adds codegen for these clauses. These clauses are generated as arguments of the '__kmpc_taskloop' libcall and are encoded the following way: void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int sched, kmp_uint64 grainsize, void *task_dup); If 'grainsize' is specified, 'sched' argument must be set to '1' and 'grainsize' argument must be set to the value of the 'grainsize' clause. If 'num_tasks' is specified, 'sched' argument must be set to '2' and 'grainsize' argument must be set to the value of the 'num_tasks' clause. It is possible because these 2 clauses are mutually exclusive and can't be used at the same time on the same directive. If none of these clauses is specified, 'sched' argument must be set to '0'. llvm-svn: 267862
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp23
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h10
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp16
-rw-r--r--clang/test/OpenMP/taskloop_codegen.cpp14
4 files changed, 42 insertions, 21 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 19a58f06feb..ec939dd9db3 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -3777,7 +3777,8 @@ void CGOpenMPRuntime::emitTaskCall(
void CGOpenMPRuntime::emitTaskLoopCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
- bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, bool Nogroup,
+ bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule, bool Nogroup,
unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> PrivateCopies,
@@ -3825,17 +3826,19 @@ void CGOpenMPRuntime::emitTaskLoopCall(
cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
/*IsInitializer=*/true);
+ enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
llvm::Value *TaskArgs[] = {
- UpLoc,
- ThreadID,
- Data.NewTask,
- IfVal,
- LBLVal.getPointer(),
- UBLVal.getPointer(),
- CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
+ UpLoc, ThreadID, Data.NewTask, IfVal, LBLVal.getPointer(),
+ UBLVal.getPointer(), CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
llvm::ConstantInt::getSigned(CGF.IntTy, Nogroup ? 1 : 0),
- llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/0),
- llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
+ llvm::ConstantInt::getSigned(
+ CGF.IntTy, Schedule.getPointer()
+ ? Schedule.getInt() ? NumTasks : Grainsize
+ : NoSchedule),
+ Schedule.getPointer()
+ ? CGF.Builder.CreateIntCast(Schedule.getPointer(), CGF.Int64Ty,
+ /*isSigned=*/false)
+ : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs);
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 68fe1ccc981..19eb3622f0a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -858,6 +858,9 @@ public:
/// \param Final Contains either constant bool value, or llvm::Value * of i1
/// type for final clause. If the value is true, the task forces all of its
/// child tasks to become final and included tasks.
+ /// \param Schedule If Pointer is nullptr, no grainsize/num_tasks clauses were
+ /// specified. If IntVal is false - it is for grainsize clause, true - for
+ /// num_tasks clause.
/// \param Nogroup true if nogroup clause was specified, false otherwise.
/// \param NumberOfParts Number of parts in untied taskloops.
/// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32
@@ -881,9 +884,10 @@ public:
virtual void emitTaskLoopCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
- bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction,
- QualType SharedsTy, Address Shareds, const Expr *IfCond,
- ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies,
+ llvm::PointerIntPair<llvm::Value *, 1, bool> Schedule, bool Nogroup,
+ unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+ Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+ ArrayRef<const Expr *> PrivateCopies,
ArrayRef<const Expr *> FirstprivateVars,
ArrayRef<const Expr *> FirstprivateCopies,
ArrayRef<const Expr *> FirstprivateInits);
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 9a9fdfcb7f4..ae783f5dde7 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3362,6 +3362,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
// By default the task is not final.
Final.setInt(/*IntVal=*/false);
}
+ llvm::PointerIntPair<llvm::Value * /*no grainsize/num_tasks=nullptr*/, 1,
+ bool /*Grainsize=false, NumTasks=true*/>
+ Schedule;
+ if (const auto* Clause = S.getSingleClause<OMPGrainsizeClause>()) {
+ // grainsize clause
+ Schedule.setInt(/*IntVal=*/false);
+ Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
+ } else if (const auto* Clause = S.getSingleClause<OMPNumTasksClause>()) {
+ // num_tasks clause
+ Schedule.setInt(/*IntVal=*/true);
+ Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
+ }
auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
// if (PreCond) {
@@ -3433,13 +3445,13 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.EmitBlock(ContBlock, true);
}
};
- auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final,
+ auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final, &Schedule,
Nogroup](CodeGenFunction &CGF, llvm::Value *OutlinedFn,
const OMPPrivateDataTy &Data) {
auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(
- CGF, S.getLocStart(), S, Data.Tied, Final, Nogroup,
+ CGF, S.getLocStart(), S, Data.Tied, Final, Schedule, Nogroup,
Data.NumberOfParts, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
Data.PrivateVars, Data.PrivateCopies, Data.FirstprivateVars,
Data.FirstprivateCopies, Data.FirstprivateInits);
diff --git a/clang/test/OpenMP/taskloop_codegen.cpp b/clang/test/OpenMP/taskloop_codegen.cpp
index f3ae1b12208..02a5bf1af7a 100644
--- a/clang/test/OpenMP/taskloop_codegen.cpp
+++ b/clang/test/OpenMP/taskloop_codegen.cpp
@@ -37,8 +37,9 @@ int main(int argc, char **argv) {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
-// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null)
-#pragma omp taskloop nogroup
+// CHECK: [[GRAINSIZE:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 1, i64 [[GRAINSIZE]], i8* null)
+#pragma omp taskloop nogroup grainsize(argc)
for (int i = 0; i < 10; ++i)
;
// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*))
@@ -55,9 +56,9 @@ int main(int argc, char **argv) {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
-// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 4, i8* null)
int i;
-#pragma omp taskloop if(argc) shared(argc, argv) collapse(2)
+#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(4)
for (i = 0; i < argc; ++i)
for (int j = argc; j < argv[argc][argc]; ++j)
;
@@ -158,8 +159,9 @@ struct S {
// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6
// CHECK: store i64 1, i64* [[ST]],
// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]],
-// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null)
-#pragma omp taskloop shared(c)
+// CHECK: [[NUM_TASKS:%.+]] = zext i32 %{{.+}} to i64
+// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 2, i64 [[NUM_TASKS]], i8* null)
+#pragma omp taskloop shared(c) num_tasks(a)
for (a = 0; a < c; ++a)
;
}
OpenPOWER on IntegriCloud