diff options
author | Carlo Bertolli <cbertol@us.ibm.com> | 2016-04-04 15:55:02 +0000 |
---|---|---|
committer | Carlo Bertolli <cbertol@us.ibm.com> | 2016-04-04 15:55:02 +0000 |
commit | c687225b436cd0ff0a6061a6f79f13c35e6b6228 (patch) | |
tree | 71f1428bdf6f4138fd2c53ebe76960b814fc4ec2 /clang/lib | |
parent | 1eec3f01f07c88325f9958b85dee8d33f548af52 (diff) | |
download | bcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.tar.gz bcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.zip |
[OPENMP] Codegen for teams directive for NVPTX
This patch implements the teams directive for the NVPTX backend. It is different from the host code generation path as it:
Does not call kmpc_fork_teams. All necessary teams and threads are started upon touching the target region, when launching a CUDA kernel, and their execution is coordinated through sequential and parallel regions within the target region.
Does not call kmpc_push_num_teams even if a num_teams of thread_limit clause is present. Setting the number of teams and the thread limit is implemented by the nvptx-related runtime.
Please note that I am now passing a Clang Expr * to emitPushNumTeams instead of the originally chosen llvm::Value * type. The reason for that is that I want to avoid emitting expressions for num_teams and thread_limit if they are not needed in the target region.
http://reviews.llvm.org/D17963
llvm-svn: 265304
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 20 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 9 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 44 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 35 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 17 |
5 files changed, 104 insertions, 21 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 33528b50a18..6865a84ffce 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4832,17 +4832,29 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, } void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, - llvm::Value *NumTeams, - llvm::Value *ThreadLimit, + const Expr *NumTeams, + const Expr *ThreadLimit, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *NumTeamsVal = + (NumTeams) + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams), + CGF.CGM.Int32Ty, /* isSigned = */ true) + : CGF.Builder.getInt32(0); + + llvm::Value *ThreadLimitVal = + (ThreadLimit) + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), + CGF.CGM.Int32Ty, /* isSigned = */ true) + : CGF.Builder.getInt32(0); + // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit) - llvm::Value *PushNumTeamsArgs[] = { - RTLoc, getThreadID(CGF, Loc), NumTeams, ThreadLimit}; + llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal, + ThreadLimitVal}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams), PushNumTeamsArgs); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index d2a1a243a8c..019461842df 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -912,11 +912,10 @@ public: /// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code /// for num_teams clause. - /// \param NumTeams An integer value of teams. - /// \param ThreadLimit An integer value of threads. - virtual void emitNumTeamsClause(CodeGenFunction &CGF, llvm::Value *NumTeams, - llvm::Value *ThreadLimit, SourceLocation Loc); - + /// \param NumTeams An integer expression of teams. + /// \param ThreadLimit An integer expression of threads. + virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, + const Expr *ThreadLimit, SourceLocation Loc); }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 3b2b30cdb73..d64f6df7201 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -14,6 +14,8 @@ #include "CGOpenMPRuntimeNVPTX.h" #include "clang/AST/DeclOpenMP.h" +#include "CodeGenFunction.h" +#include "clang/AST/StmtOpenMP.h" using namespace clang; using namespace CodeGen; @@ -350,3 +352,45 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) // Called once per module during initialization. initializeEnvironment(); } + +void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF, + const Expr *NumTeams, + const Expr *ThreadLimit, + SourceLocation Loc) {} + +llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction( + const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + + llvm::Function *OutlinedFun = nullptr; + if (isa<OMPTeamsDirective>(D)) { + llvm::Value *OutlinedFunVal = + CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( + D, ThreadIDVar, InnermostKind, CodeGen); + OutlinedFun = cast<llvm::Function>(OutlinedFunVal); + OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); + } else + llvm_unreachable("parallel directive is not yet supported for nvptx " + "backend."); + + return OutlinedFun; +} + +void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF, + const OMPExecutableDirective &D, + SourceLocation Loc, + llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars) { + if (!CGF.HaveInsertPoint()) + return; + + Address ZeroAddr = + CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), + /*Name*/ ".zero.addr"); + CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); + CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); +} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 6ef3d3c08bd..a6c64b2f6d6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -136,6 +136,41 @@ private: public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); + + /// \brief This function ought to emit, in the general case, a call to + // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed + // as these numbers are obtained through the PTX grid and block configuration. + /// \param NumTeams An integer expression of teams. + /// \param ThreadLimit An integer expression of threads. + void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, + const Expr *ThreadLimit, SourceLocation Loc) override; + + /// \brief Emits inlined function for the specified OpenMP parallel + // directive but an inlined function for teams. + /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, + /// kmp_int32 BoundID, struct context_vars*). + /// \param D OpenMP directive. + /// \param ThreadIDVar Variable for thread id in the current OpenMP region. + /// \param InnermostKind Kind of innermost directive (for simple directives it + /// is a directive itself, for combined - its innermost directive). + /// \param CodeGen Code generation sequence for the \a D directive. + llvm::Value * + emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, + OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) override; + + /// \brief Emits code for teams call of the \a OutlinedFn with + /// variables captured in a record which address is stored in \a + /// CapturedStruct. + /// \param OutlinedFn Outlined function to be run by team masters. Type of + /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). + /// \param CapturedVars A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// + void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, + SourceLocation Loc, llvm::Value *OutlinedFn, + ArrayRef<llvm::Value *> CapturedVars) override; }; } // CodeGen namespace. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 84059ab2912..bd208cb4df5 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3098,18 +3098,11 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>(); const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>(); if (NT || TL) { - llvm::Value *NumTeamsVal = (NT) ? CGF.Builder.CreateIntCast( - CGF.EmitScalarExpr(NT->getNumTeams()), CGF.CGM.Int32Ty, - /* isSigned = */ true) : - CGF.Builder.getInt32(0); - - llvm::Value *ThreadLimitVal = (TL) ? CGF.Builder.CreateIntCast( - CGF.EmitScalarExpr(TL->getThreadLimit()), CGF.CGM.Int32Ty, - /* isSigned = */ true) : - CGF.Builder.getInt32(0); - - CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeamsVal, - ThreadLimitVal, S.getLocStart()); + Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr; + Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr; + + CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, + S.getLocStart()); } OMPLexicalScope Scope(CGF, S); |