summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
authorArpith Chacko Jacob <acjacob@us.ibm.com>2017-01-25 01:38:33 +0000
committerArpith Chacko Jacob <acjacob@us.ibm.com>2017-01-25 01:38:33 +0000
commit4dbf368e14e4e96311abd084f29b07d398f680fc (patch)
treea10c316d939ffb931128dc8b4cef78457b99d213 /clang/lib
parent2f3f9855f0fd5a5c5af28bf705b35fc30d6e9738 (diff)
downloadbcm5719-llvm-4dbf368e14e4e96311abd084f29b07d398f680fc.tar.gz
bcm5719-llvm-4dbf368e14e4e96311abd084f29b07d398f680fc.zip
[OpenMP] Codegen support for 'target teams' on the host.
This patch adds support for codegen of 'target teams' on the host. This combined directive has two captured statements, one for the 'teams' region, and the other for the 'parallel'. This target teams region is offloaded using the __tgt_target_teams() call. The patch sets the number of teams as an argument to this call. Reviewers: ABataev Differential Revision: https://reviews.llvm.org/D29084 llvm-svn: 293001
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/Basic/OpenMPKinds.cpp5
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp75
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp1
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp65
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h3
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp11
6 files changed, 119 insertions, 41 deletions
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp
index 12686ff083a..12a4aea31fe 100644
--- a/clang/lib/Basic/OpenMPKinds.cpp
+++ b/clang/lib/Basic/OpenMPKinds.cpp
@@ -875,8 +875,11 @@ void clang::getOpenMPCaptureRegions(
case OMPD_parallel_sections:
CaptureRegions.push_back(OMPD_parallel);
break;
- case OMPD_teams:
case OMPD_target_teams:
+ CaptureRegions.push_back(OMPD_target);
+ CaptureRegions.push_back(OMPD_teams);
+ break;
+ case OMPD_teams:
case OMPD_simd:
case OMPD_for:
case OMPD_for_simd:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index d9c68f9cce5..25b2c167df9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4911,18 +4911,28 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
"teams directive expected to be "
"emitted only for the host!");
+ auto &Bld = CGF.Builder;
+
+ // If the target directive is combined with a teams directive:
+ // Return the value in the num_teams clause, if any.
+ // Otherwise, return 0 to denote the runtime default.
+ if (isOpenMPTeamsDirective(D.getDirectiveKind())) {
+ if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
+ CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
+ auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
+ /*IgnoreResultAssign*/ true);
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
+
+ // The default value is 0.
+ return Bld.getInt32(0);
+ }
+
// If the target directive is combined with a parallel directive but not a
// teams directive, start one team.
- if (isOpenMPParallelDirective(D.getDirectiveKind()) &&
- !isOpenMPTeamsDirective(D.getDirectiveKind()))
- return CGF.Builder.getInt32(1);
-
- // FIXME: For the moment we do not support combined directives with target and
- // teams, so we do not expect to get any num_teams clause in the provided
- // directive. Once we support that, this assertion can be replaced by the
- // actual emission of the clause expression.
- assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr &&
- "Not expecting clause in directive.");
+ if (isOpenMPParallelDirective(D.getDirectiveKind()))
+ return Bld.getInt32(1);
// If the current target region has a teams region enclosed, we need to get
// the number of teams to pass to the runtime function call. This is done
@@ -4940,13 +4950,13 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
CGOpenMPInnerExprInfo CGInfo(CGF, CS);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
- return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty,
- /*IsSigned=*/true);
+ return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
+ /*IsSigned=*/true);
}
// If we have an enclosed teams directive but no num_teams clause we use
// the default value 0.
- return CGF.Builder.getInt32(0);
+ return Bld.getInt32(0);
}
// No teams associated with the directive.
@@ -4986,9 +4996,20 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
//
// If this is not a teams directive return nullptr.
- if (isOpenMPParallelDirective(D.getDirectiveKind())) {
+ if (isOpenMPTeamsDirective(D.getDirectiveKind()) ||
+ isOpenMPParallelDirective(D.getDirectiveKind())) {
llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
llvm::Value *NumThreadsVal = nullptr;
+ llvm::Value *ThreadLimitVal = nullptr;
+
+ if (const auto *ThreadLimitClause =
+ D.getSingleClause<OMPThreadLimitClause>()) {
+ CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
+ auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
+ /*IgnoreResultAssign*/ true);
+ ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
+ /*IsSigned=*/true);
+ }
if (const auto *NumThreadsClause =
D.getSingleClause<OMPNumThreadsClause>()) {
@@ -5000,15 +5021,21 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime,
Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
}
- return NumThreadsVal ? NumThreadsVal : DefaultThreadLimitVal;
- }
+ // Select the lesser of thread_limit and num_threads.
+ if (NumThreadsVal)
+ ThreadLimitVal = ThreadLimitVal
+ ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
+ ThreadLimitVal),
+ NumThreadsVal, ThreadLimitVal)
+ : NumThreadsVal;
- // FIXME: For the moment we do not support combined directives with target and
- // teams, so we do not expect to get any thread_limit clause in the provided
- // directive. Once we support that, this assertion can be replaced by the
- // actual emission of the clause expression.
- assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr &&
- "Not expecting clause in directive.");
+ // Set default value passed to the runtime if either teams or a target
+ // parallel type directive is found but no clause is specified.
+ if (!ThreadLimitVal)
+ ThreadLimitVal = DefaultThreadLimitVal;
+
+ return ThreadLimitVal;
+ }
// If the current target region has a teams region enclosed, we need to get
// the thread limit to pass to the runtime function call. This is done
@@ -6217,6 +6244,10 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
break;
+ case Stmt::OMPTargetTeamsDirectiveClass:
+ CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
+ CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
+ break;
default:
llvm_unreachable("Unknown target directive for OpenMP device codegen.");
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 6b5591b8b5c..a277b679f2a 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -198,6 +198,7 @@ getExecutionModeForDirective(CodeGenModule &CGM,
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
switch (DirectiveKind) {
case OMPD_target:
+ case OMPD_target_teams:
return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic;
case OMPD_target_parallel:
return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd;
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f72eb8e4f1d..54c33e1dd2c 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -98,6 +98,22 @@ public:
/*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
};
+/// Lexical scope for OpenMP teams construct, that handles correct codegen
+/// for captured expressions.
+class OMPTeamsScope final : public OMPLexicalScope {
+ bool EmitPreInitStmt(const OMPExecutableDirective &S) {
+ OpenMPDirectiveKind Kind = S.getDirectiveKind();
+ return !isOpenMPTargetExecutionDirective(Kind) &&
+ isOpenMPTeamsDirective(Kind);
+ }
+
+public:
+ OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
+ : OMPLexicalScope(CGF, S,
+ /*AsInlined=*/false,
+ /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {}
+};
+
/// Private scope for OpenMP loop-based directives, that supports capturing
/// of used expression from loop statement.
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
@@ -2018,15 +2034,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
});
}
-void CodeGenFunction::EmitOMPTargetTeamsDirective(
- const OMPTargetTeamsDirective &S) {
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
-}
-
void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
const OMPTargetTeamsDistributeDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(
@@ -3519,9 +3526,8 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
- const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S);
- const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
- const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
+ const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>();
+ const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>();
if (NT || TL) {
Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
@@ -3530,7 +3536,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
S.getLocStart());
}
- OMPLexicalScope Scope(CGF, S);
+ OMPTeamsScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn,
@@ -3549,6 +3555,39 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen);
}
+static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
+ const OMPTargetTeamsDirective &S) {
+ auto *CS = S.getCapturedStmt(OMPD_teams);
+ Action.Enter(CGF);
+ auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
+ // TODO: Add support for clauses.
+ CGF.EmitStmt(CS->getCapturedStmt());
+ };
+ emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
+ CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsRegion(CGF, Action, S);
+ };
+ llvm::Function *Fn;
+ llvm::Constant *Addr;
+ // Emit target region as a standalone region.
+ CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
+ S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
+ assert(Fn && Addr && "Target device function emission failed.");
+}
+
+void CodeGenFunction::EmitOMPTargetTeamsDirective(
+ const OMPTargetTeamsDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ emitTargetTeamsRegion(CGF, Action, S);
+ };
+ emitCommonOMPTargetDirective(*this, S, CodeGen);
+}
+
void CodeGenFunction::EmitOMPCancellationPointDirective(
const OMPCancellationPointDirective &S) {
CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(),
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 7db72dda5c1..82ab5c3781e 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2711,6 +2711,9 @@ public:
static void
EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
const OMPTargetParallelDirective &S);
+ static void
+ EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName,
+ const OMPTargetTeamsDirective &S);
/// \brief Emit inner loop of the worksharing/simd construct.
///
/// \param S Directive, for which the inner loop must be emitted.
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 9ef39b6889b..040bddc6c70 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -1594,8 +1594,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
case OMPD_parallel_for:
case OMPD_parallel_for_simd:
case OMPD_parallel_sections:
- case OMPD_teams:
- case OMPD_target_teams: {
+ case OMPD_teams: {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
QualType KmpInt32PtrTy =
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
@@ -1608,6 +1607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
Params);
break;
}
+ case OMPD_target_teams:
case OMPD_target_parallel: {
Sema::CapturedParamNameType ParamsTarget[] = {
std::make_pair(StringRef(), QualType()) // __context with shared vars
@@ -1618,14 +1618,15 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) {
QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1);
QualType KmpInt32PtrTy =
Context.getPointerType(KmpInt32Ty).withConst().withRestrict();
- Sema::CapturedParamNameType ParamsParallel[] = {
+ Sema::CapturedParamNameType ParamsTeamsOrParallel[] = {
std::make_pair(".global_tid.", KmpInt32PtrTy),
std::make_pair(".bound_tid.", KmpInt32PtrTy),
std::make_pair(StringRef(), QualType()) // __context with shared vars
};
- // Start a captured region for 'parallel'.
+ // Start a captured region for 'teams' or 'parallel'. Both regions have
+ // the same implicit parameters.
ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP,
- ParamsParallel);
+ ParamsTeamsOrParallel);
break;
}
case OMPD_simd:
OpenPOWER on IntegriCloud