diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/Basic/OpenMPKinds.cpp | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 75 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 1 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 65 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 3 | ||||
-rw-r--r-- | clang/lib/Sema/SemaOpenMP.cpp | 11 |
6 files changed, 119 insertions, 41 deletions
diff --git a/clang/lib/Basic/OpenMPKinds.cpp b/clang/lib/Basic/OpenMPKinds.cpp index 12686ff083a..12a4aea31fe 100644 --- a/clang/lib/Basic/OpenMPKinds.cpp +++ b/clang/lib/Basic/OpenMPKinds.cpp @@ -875,8 +875,11 @@ void clang::getOpenMPCaptureRegions( case OMPD_parallel_sections: CaptureRegions.push_back(OMPD_parallel); break; - case OMPD_teams: case OMPD_target_teams: + CaptureRegions.push_back(OMPD_target); + CaptureRegions.push_back(OMPD_teams); + break; + case OMPD_teams: case OMPD_simd: case OMPD_for: case OMPD_for_simd: diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index d9c68f9cce5..25b2c167df9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4911,18 +4911,28 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, "teams directive expected to be " "emitted only for the host!"); + auto &Bld = CGF.Builder; + + // If the target directive is combined with a teams directive: + // Return the value in the num_teams clause, if any. + // Otherwise, return 0 to denote the runtime default. + if (isOpenMPTeamsDirective(D.getDirectiveKind())) { + if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) { + CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); + auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // The default value is 0. + return Bld.getInt32(0); + } + // If the target directive is combined with a parallel directive but not a // teams directive, start one team. - if (isOpenMPParallelDirective(D.getDirectiveKind()) && - !isOpenMPTeamsDirective(D.getDirectiveKind())) - return CGF.Builder.getInt32(1); - - // FIXME: For the moment we do not support combined directives with target and - // teams, so we do not expect to get any num_teams clause in the provided - // directive. Once we support that, this assertion can be replaced by the - // actual emission of the clause expression. - assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && - "Not expecting clause in directive."); + if (isOpenMPParallelDirective(D.getDirectiveKind())) + return Bld.getInt32(1); // If the current target region has a teams region enclosed, we need to get // the number of teams to pass to the runtime function call. This is done @@ -4940,13 +4950,13 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CGOpenMPInnerExprInfo CGInfo(CGF, CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); - return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); + return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); } // If we have an enclosed teams directive but no num_teams clause we use // the default value 0. - return CGF.Builder.getInt32(0); + return Bld.getInt32(0); } // No teams associated with the directive. @@ -4986,9 +4996,20 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, // // If this is not a teams directive return nullptr. - if (isOpenMPParallelDirective(D.getDirectiveKind())) { + if (isOpenMPTeamsDirective(D.getDirectiveKind()) || + isOpenMPParallelDirective(D.getDirectiveKind())) { llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); llvm::Value *NumThreadsVal = nullptr; + llvm::Value *ThreadLimitVal = nullptr; + + if (const auto *ThreadLimitClause = + D.getSingleClause<OMPThreadLimitClause>()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), + /*IgnoreResultAssign*/ true); + ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + } if (const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>()) { @@ -5000,15 +5021,21 @@ emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); } - return NumThreadsVal ? NumThreadsVal : DefaultThreadLimitVal; - } + // Select the lesser of thread_limit and num_threads. + if (NumThreadsVal) + ThreadLimitVal = ThreadLimitVal + ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, + ThreadLimitVal), + NumThreadsVal, ThreadLimitVal) + : NumThreadsVal; - // FIXME: For the moment we do not support combined directives with target and - // teams, so we do not expect to get any thread_limit clause in the provided - // directive. Once we support that, this assertion can be replaced by the - // actual emission of the clause expression. - assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && - "Not expecting clause in directive."); + // Set default value passed to the runtime if either teams or a target + // parallel type directive is found but no clause is specified. + if (!ThreadLimitVal) + ThreadLimitVal = DefaultThreadLimitVal; + + return ThreadLimitVal; + } // If the current target region has a teams region enclosed, we need to get // the thread limit to pass to the runtime function call. This is done @@ -6217,6 +6244,10 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, CodeGenFunction::EmitOMPTargetParallelDeviceFunction( CGM, ParentName, cast<OMPTargetParallelDirective>(*S)); break; + case Stmt::OMPTargetTeamsDirectiveClass: + CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsDirective>(*S)); + break; default: llvm_unreachable("Unknown target directive for OpenMP device codegen."); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 6b5591b8b5c..a277b679f2a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -198,6 +198,7 @@ getExecutionModeForDirective(CodeGenModule &CGM, OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); switch (DirectiveKind) { case OMPD_target: + case OMPD_target_teams: return CGOpenMPRuntimeNVPTX::ExecutionMode::Generic; case OMPD_target_parallel: return CGOpenMPRuntimeNVPTX::ExecutionMode::Spmd; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f72eb8e4f1d..54c33e1dd2c 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -98,6 +98,22 @@ public: /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} }; +/// Lexical scope for OpenMP teams construct, that handles correct codegen +/// for captured expressions. +class OMPTeamsScope final : public OMPLexicalScope { + bool EmitPreInitStmt(const OMPExecutableDirective &S) { + OpenMPDirectiveKind Kind = S.getDirectiveKind(); + return !isOpenMPTargetExecutionDirective(Kind) && + isOpenMPTeamsDirective(Kind); + } + +public: + OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) + : OMPLexicalScope(CGF, S, + /*AsInlined=*/false, + /*EmitPreInitStmt=*/EmitPreInitStmt(S)) {} +}; + /// Private scope for OpenMP loop-based directives, that supports capturing /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { @@ -2018,15 +2034,6 @@ void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( }); } -void CodeGenFunction::EmitOMPTargetTeamsDirective( - const OMPTargetTeamsDirective &S) { - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_teams, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); -} - void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( const OMPTargetTeamsDistributeDirective &S) { CGM.getOpenMPRuntime().emitInlinedDirective( @@ -3519,9 +3526,8 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); - const OMPTeamsDirective &TD = *dyn_cast<OMPTeamsDirective>(&S); - const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); - const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); + const OMPNumTeamsClause *NT = S.getSingleClause<OMPNumTeamsClause>(); + const OMPThreadLimitClause *TL = S.getSingleClause<OMPThreadLimitClause>(); if (NT || TL) { Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; @@ -3530,7 +3536,7 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, S.getLocStart()); } - OMPLexicalScope Scope(CGF, S); + OMPTeamsScope Scope(CGF, S); llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, @@ -3549,6 +3555,39 @@ void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { emitCommonOMPTeamsDirective(*this, S, OMPD_teams, CodeGen); } +static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, + const OMPTargetTeamsDirective &S) { + auto *CS = S.getCapturedStmt(OMPD_teams); + Action.Enter(CGF); + auto &&CodeGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + // TODO: Add support for clauses. + CGF.EmitStmt(CS->getCapturedStmt()); + }; + emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); +} + +void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( + CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + llvm::Function *Fn; + llvm::Constant *Addr; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); + assert(Fn && Addr && "Target device function emission failed."); +} + +void CodeGenFunction::EmitOMPTargetTeamsDirective( + const OMPTargetTeamsDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + emitTargetTeamsRegion(CGF, Action, S); + }; + emitCommonOMPTargetDirective(*this, S, CodeGen); +} + void CodeGenFunction::EmitOMPCancellationPointDirective( const OMPCancellationPointDirective &S) { CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getLocStart(), diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 7db72dda5c1..82ab5c3781e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2711,6 +2711,9 @@ public: static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S); + static void + EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, + const OMPTargetTeamsDirective &S); /// \brief Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 9ef39b6889b..040bddc6c70 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -1594,8 +1594,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { case OMPD_parallel_for: case OMPD_parallel_for_simd: case OMPD_parallel_sections: - case OMPD_teams: - case OMPD_target_teams: { + case OMPD_teams: { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); @@ -1608,6 +1607,7 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { Params); break; } + case OMPD_target_teams: case OMPD_target_parallel: { Sema::CapturedParamNameType ParamsTarget[] = { std::make_pair(StringRef(), QualType()) // __context with shared vars @@ -1618,14 +1618,15 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { QualType KmpInt32Ty = Context.getIntTypeForBitwidth(32, 1); QualType KmpInt32PtrTy = Context.getPointerType(KmpInt32Ty).withConst().withRestrict(); - Sema::CapturedParamNameType ParamsParallel[] = { + Sema::CapturedParamNameType ParamsTeamsOrParallel[] = { std::make_pair(".global_tid.", KmpInt32PtrTy), std::make_pair(".bound_tid.", KmpInt32PtrTy), std::make_pair(StringRef(), QualType()) // __context with shared vars }; - // Start a captured region for 'parallel'. + // Start a captured region for 'teams' or 'parallel'. Both regions have + // the same implicit parameters. ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, - ParamsParallel); + ParamsTeamsOrParallel); break; } case OMPD_simd: |