summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGStmtOpenMP.cpp
diff options
context:
space:
mode:
authorCarlo Bertolli <cbertol@us.ibm.com>2016-04-04 15:55:02 +0000
committerCarlo Bertolli <cbertol@us.ibm.com>2016-04-04 15:55:02 +0000
commitc687225b436cd0ff0a6061a6f79f13c35e6b6228 (patch)
tree71f1428bdf6f4138fd2c53ebe76960b814fc4ec2 /clang/lib/CodeGen/CGStmtOpenMP.cpp
parent1eec3f01f07c88325f9958b85dee8d33f548af52 (diff)
downloadbcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.tar.gz
bcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.zip
[OPENMP] Codegen for teams directive for NVPTX
This patch implements the teams directive for the NVPTX backend. It is different from the host code generation path as it: Does not call kmpc_fork_teams. All necessary teams and threads are started upon touching the target region, when launching a CUDA kernel, and their execution is coordinated through sequential and parallel regions within the target region. Does not call kmpc_push_num_teams even if a num_teams of thread_limit clause is present. Setting the number of teams and the thread limit is implemented by the nvptx-related runtime. Please note that I am now passing a Clang Expr * to emitPushNumTeams instead of the originally chosen llvm::Value * type. The reason for that is that I want to avoid emitting expressions for num_teams and thread_limit if they are not needed in the target region. http://reviews.llvm.org/D17963 llvm-svn: 265304
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp17
1 files changed, 5 insertions, 12 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 84059ab2912..bd208cb4df5 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3098,18 +3098,11 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
if (NT || TL) {
- llvm::Value *NumTeamsVal = (NT) ? CGF.Builder.CreateIntCast(
- CGF.EmitScalarExpr(NT->getNumTeams()), CGF.CGM.Int32Ty,
- /* isSigned = */ true) :
- CGF.Builder.getInt32(0);
-
- llvm::Value *ThreadLimitVal = (TL) ? CGF.Builder.CreateIntCast(
- CGF.EmitScalarExpr(TL->getThreadLimit()), CGF.CGM.Int32Ty,
- /* isSigned = */ true) :
- CGF.Builder.getInt32(0);
-
- CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeamsVal,
- ThreadLimitVal, S.getLocStart());
+ Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
+ Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
+
+ CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
+ S.getLocStart());
}
OMPLexicalScope Scope(CGF, S);
OpenPOWER on IntegriCloud