diff options
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 217 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 8 |
2 files changed, 217 insertions, 8 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index c4eaabe6ac9..0433115fc12 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -252,6 +252,70 @@ private: StringRef HelperName; }; +static void EmptyCodeGen(CodeGenFunction &) { + llvm_unreachable("No codegen for expressions"); +} +/// \brief API for generation of expressions captured in a innermost OpenMP +/// region. +class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo { +public: + CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) + : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, + OMPD_unknown, + /*HasCancel=*/false), + PrivScope(CGF) { + // Make sure the globals captured in the provided statement are local by + // using the privatization logic. We assume the same variable is not + // captured more than once. + for (auto &C : CS.captures()) { + if (!C.capturesVariable() && !C.capturesVariableByCopy()) + continue; + + const VarDecl *VD = C.getCapturedVar(); + if (VD->isLocalVarDeclOrParm()) + continue; + + DeclRefExpr DRE(const_cast<VarDecl *>(VD), + /*RefersToEnclosingVariableOrCapture=*/false, + VD->getType().getNonReferenceType(), VK_LValue, + SourceLocation()); + PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address { + return CGF.EmitLValue(&DRE).getAddress(); + }); + } + (void)PrivScope.Privatize(); + } + + /// \brief Lookup the captured field decl for a variable. + const FieldDecl *lookup(const VarDecl *VD) const override { + if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) + return FD; + return nullptr; + } + + /// \brief Emit the captured statement body. + void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { + llvm_unreachable("No body for expressions"); + } + + /// \brief Get a variable or parameter for storing global thread id + /// inside OpenMP construct. + const VarDecl *getThreadIDVariable() const override { + llvm_unreachable("No thread id for expressions"); + } + + /// \brief Get the name of the capture helper. + StringRef getHelperName() const override { + llvm_unreachable("No helper name for expressions"); + } + + static bool classof(const CGCapturedStmtInfo *Info) { return false; } + +private: + /// Private scope to capture global variables. + CodeGenFunction::OMPPrivateScope PrivScope; +}; + /// \brief RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; @@ -481,6 +545,10 @@ enum OpenMPRTLFunction { // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t // *arg_types); OMPRTL__tgt_target, + // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + OMPRTL__tgt_target_teams, // Call to void __tgt_register_lib(__tgt_bin_desc *desc); OMPRTL__tgt_register_lib, // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc); @@ -1153,6 +1221,24 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target"); break; } + case OMPRTL__tgt_target_teams: { + // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr, + // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, + // int32_t *arg_types, int32_t num_teams, int32_t thread_limit); + llvm::Type *TypeParams[] = {CGM.Int32Ty, + CGM.VoidPtrTy, + CGM.Int32Ty, + CGM.VoidPtrPtrTy, + CGM.VoidPtrPtrTy, + CGM.SizeTy->getPointerTo(), + CGM.Int32Ty->getPointerTo(), + CGM.Int32Ty, + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams"); + break; + } case OMPRTL__tgt_register_lib: { // Build void __tgt_register_lib(__tgt_bin_desc *desc); QualType ParamTy = @@ -3972,6 +4058,102 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID); } +/// \brief Emit the num_teams clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no num_teams clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any num_teams clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause<OMPNumTeamsClause>() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the number of teams to pass to the runtime function call. This is done + // by generating the expression in a inlined region. This is required because + // the expression is captured in the enclosing target environment when the + // teams directive is not combined with target. + + const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) { + if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); + return CGF.Builder.CreateIntCast(NumTeams, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // If we have an enclosed teams directive but no num_teams clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + +/// \brief Emit the thread_limit clause of an enclosed teams directive at the +/// target region scope. If there is no teams directive associated with the +/// target directive, or if there is no thread_limit clause associated with the +/// enclosed teams directive, return nullptr. +static llvm::Value * +emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { + + assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " + "teams directive expected to be " + "emitted only for the host!"); + + // FIXME: For the moment we do not support combined directives with target and + // teams, so we do not expect to get any thread_limit clause in the provided + // directive. Once we support that, this assertion can be replaced by the + // actual emission of the clause expression. + assert(D.getSingleClause<OMPThreadLimitClause>() == nullptr && + "Not expecting clause in directive."); + + // If the current target region has a teams region enclosed, we need to get + // the thread limit to pass to the runtime function call. This is done + // by generating the expression in a inlined region. This is required because + // the expression is captured in the enclosing target environment when the + // teams directive is not combined with target. + + const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); + + // FIXME: Accommodate other combined directives with teams when they become + // available. + if (auto *TeamsDir = dyn_cast<OMPTeamsDirective>(CS.getCapturedStmt())) { + if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); + return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, + /*IsSigned=*/true); + } + + // If we have an enclosed teams directive but no thread_limit clause we use + // the default value 0. + return CGF.Builder.getInt32(0); + } + + // No teams associated with the directive. + return nullptr; +} + void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, @@ -4100,7 +4282,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Fill up the pointer arrays and transfer execution to the device. auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, hasVLACaptures, Device, OutlinedFnID, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { + OffloadErrorQType, &D](CodeGenFunction &CGF) { unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); llvm::Value *BasePointersArray; @@ -4240,11 +4422,34 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); - llvm::Value *OffloadingArgs[] = { - DeviceID, OutlinedFnID, PointerNum, BasePointersArray, - PointersArray, SizesArray, MapTypesArray}; - auto Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), - OffloadingArgs); + // Return value of the runtime offloading call. + llvm::Value *Return; + + auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D); + auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D); + + // If we have NumTeams defined this means that we have an enclosed teams + // region. Therefore we also expect to have ThreadLimit defined. These two + // values should be defined in the presence of a teams directive, regardless + // of having any clauses associated. If the user is using teams but no + // clauses, these two values will be the default that should be passed to + // the runtime library - a 32-bit integer with the value zero. + if (NumTeams) { + assert(ThreadLimit && "Thread limit expression should be available along " + "with number of teams."); + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, + BasePointersArray, PointersArray, SizesArray, + MapTypesArray, NumTeams, ThreadLimit}; + Return = CGF.EmitRuntimeCall( + createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + } else { + llvm::Value *OffloadingArgs[] = { + DeviceID, OutlinedFnID, PointerNum, BasePointersArray, + PointersArray, SizesArray, MapTypesArray}; + Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), + OffloadingArgs); + } CGF.EmitStoreOfScalar(Return, OffloadError); }; diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index ddceb1b559c..7f4e19577ab 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2716,8 +2716,12 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { CapturedVars); } -void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &) { - llvm_unreachable("CodeGen for 'omp teams' is not supported yet."); +void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { + OMPLexicalScope Scope(*this, S); + const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); + + // FIXME: We should fork teams here instead of just emit the statement. + EmitStmt(CS.getCapturedStmt()); } void CodeGenFunction::EmitOMPCancellationPointDirective( |