summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorCarlo Bertolli <cbertol@us.ibm.com>2016-04-04 15:55:02 +0000
committerCarlo Bertolli <cbertol@us.ibm.com>2016-04-04 15:55:02 +0000
commitc687225b436cd0ff0a6061a6f79f13c35e6b6228 (patch)
tree71f1428bdf6f4138fd2c53ebe76960b814fc4ec2 /clang/lib/CodeGen
parent1eec3f01f07c88325f9958b85dee8d33f548af52 (diff)
downloadbcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.tar.gz
bcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.zip
[OPENMP] Codegen for teams directive for NVPTX
This patch implements the teams directive for the NVPTX backend. It is different from the host code generation path as it: Does not call kmpc_fork_teams. All necessary teams and threads are started upon touching the target region, when launching a CUDA kernel, and their execution is coordinated through sequential and parallel regions within the target region. Does not call kmpc_push_num_teams even if a num_teams of thread_limit clause is present. Setting the number of teams and the thread limit is implemented by the nvptx-related runtime. Please note that I am now passing a Clang Expr * to emitPushNumTeams instead of the originally chosen llvm::Value * type. The reason for that is that I want to avoid emitting expressions for num_teams and thread_limit if they are not needed in the target region. http://reviews.llvm.org/D17963 llvm-svn: 265304
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp20
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h9
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp44
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h35
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp17
5 files changed, 104 insertions, 21 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 33528b50a18..6865a84ffce 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -4832,17 +4832,29 @@ void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
}
void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
- llvm::Value *NumTeams,
- llvm::Value *ThreadLimit,
+ const Expr *NumTeams,
+ const Expr *ThreadLimit,
SourceLocation Loc) {
if (!CGF.HaveInsertPoint())
return;
auto *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *NumTeamsVal =
+ (NumTeams)
+ ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
+ CGF.CGM.Int32Ty, /* isSigned = */ true)
+ : CGF.Builder.getInt32(0);
+
+ llvm::Value *ThreadLimitVal =
+ (ThreadLimit)
+ ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
+ CGF.CGM.Int32Ty, /* isSigned = */ true)
+ : CGF.Builder.getInt32(0);
+
// Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
- llvm::Value *PushNumTeamsArgs[] = {
- RTLoc, getThreadID(CGF, Loc), NumTeams, ThreadLimit};
+ llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
+ ThreadLimitVal};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_push_num_teams),
PushNumTeamsArgs);
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index d2a1a243a8c..019461842df 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -912,11 +912,10 @@ public:
/// \brief Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_teams, kmp_int32 thread_limit) to generate code
/// for num_teams clause.
- /// \param NumTeams An integer value of teams.
- /// \param ThreadLimit An integer value of threads.
- virtual void emitNumTeamsClause(CodeGenFunction &CGF, llvm::Value *NumTeams,
- llvm::Value *ThreadLimit, SourceLocation Loc);
-
+ /// \param NumTeams An integer expression of teams.
+ /// \param ThreadLimit An integer expression of threads.
+ virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+ const Expr *ThreadLimit, SourceLocation Loc);
};
} // namespace CodeGen
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 3b2b30cdb73..d64f6df7201 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -14,6 +14,8 @@
#include "CGOpenMPRuntimeNVPTX.h"
#include "clang/AST/DeclOpenMP.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
using namespace clang;
using namespace CodeGen;
@@ -350,3 +352,45 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
// Called once per module during initialization.
initializeEnvironment();
}
+
+void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
+ const Expr *NumTeams,
+ const Expr *ThreadLimit,
+ SourceLocation Loc) {}
+
+llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction(
+ const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+
+ llvm::Function *OutlinedFun = nullptr;
+ if (isa<OMPTeamsDirective>(D)) {
+ llvm::Value *OutlinedFunVal =
+ CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
+ D, ThreadIDVar, InnermostKind, CodeGen);
+ OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
+ OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+ } else
+ llvm_unreachable("parallel directive is not yet supported for nvptx "
+ "backend.");
+
+ return OutlinedFun;
+}
+
+void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
+ const OMPExecutableDirective &D,
+ SourceLocation Loc,
+ llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars) {
+ if (!CGF.HaveInsertPoint())
+ return;
+
+ Address ZeroAddr =
+ CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
+ /*Name*/ ".zero.addr");
+ CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+ llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+ OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+ CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 6ef3d3c08bd..a6c64b2f6d6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -136,6 +136,41 @@ private:
public:
explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
+
+ /// \brief This function ought to emit, in the general case, a call to
+ // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
+ // as these numbers are obtained through the PTX grid and block configuration.
+ /// \param NumTeams An integer expression of teams.
+ /// \param ThreadLimit An integer expression of threads.
+ void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+ const Expr *ThreadLimit, SourceLocation Loc) override;
+
+ /// \brief Emits inlined function for the specified OpenMP parallel
+ // directive but an inlined function for teams.
+ /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+ /// kmp_int32 BoundID, struct context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ /// \param InnermostKind Kind of innermost directive (for simple directives it
+ /// is a directive itself, for combined - its innermost directive).
+ /// \param CodeGen Code generation sequence for the \a D directive.
+ llvm::Value *
+ emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar,
+ OpenMPDirectiveKind InnermostKind,
+ const RegionCodeGenTy &CodeGen) override;
+
+ /// \brief Emits code for teams call of the \a OutlinedFn with
+ /// variables captured in a record which address is stored in \a
+ /// CapturedStruct.
+ /// \param OutlinedFn Outlined function to be run by team masters. Type of
+ /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+ /// \param CapturedVars A pointer to the record with the references to
+ /// variables used in \a OutlinedFn function.
+ ///
+ void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+ SourceLocation Loc, llvm::Value *OutlinedFn,
+ ArrayRef<llvm::Value *> CapturedVars) override;
};
} // CodeGen namespace.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 84059ab2912..bd208cb4df5 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -3098,18 +3098,11 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
const OMPNumTeamsClause *NT = TD.getSingleClause<OMPNumTeamsClause>();
const OMPThreadLimitClause *TL = TD.getSingleClause<OMPThreadLimitClause>();
if (NT || TL) {
- llvm::Value *NumTeamsVal = (NT) ? CGF.Builder.CreateIntCast(
- CGF.EmitScalarExpr(NT->getNumTeams()), CGF.CGM.Int32Ty,
- /* isSigned = */ true) :
- CGF.Builder.getInt32(0);
-
- llvm::Value *ThreadLimitVal = (TL) ? CGF.Builder.CreateIntCast(
- CGF.EmitScalarExpr(TL->getThreadLimit()), CGF.CGM.Int32Ty,
- /* isSigned = */ true) :
- CGF.Builder.getInt32(0);
-
- CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeamsVal,
- ThreadLimitVal, S.getLocStart());
+ Expr *NumTeams = (NT) ? NT->getNumTeams() : nullptr;
+ Expr *ThreadLimit = (TL) ? TL->getThreadLimit() : nullptr;
+
+ CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
+ S.getLocStart());
}
OMPLexicalScope Scope(CGF, S);
OpenPOWER on IntegriCloud