[OPENMP] Codegen for teams directive for NVPTX

This patch implements the teams directive for the NVPTX backend. It is different from the host code generation path as it: Does not call kmpc_fork_teams. All necessary teams and threads are started upon touching the target region, when launching a CUDA kernel, and their execution is coordinated through sequential and parallel regions within the target region. Does not call kmpc_push_num_teams even if a num_teams of thread_limit clause is present. Setting the number of teams and the thread limit is implemented by the nvptx-related runtime. Please note that I am now passing a Clang Expr * to emitPushNumTeams instead of the originally chosen llvm::Value * type. The reason for that is that I want to avoid emitting expressions for num_teams and thread_limit if they are not needed in the target region. http://reviews.llvm.org/D17963 llvm-svn: 265304
author: Carlo Bertolli <cbertol@us.ibm.com> 2016-04-04 15:55:02 +0000
committer: Carlo Bertolli <cbertol@us.ibm.com> 2016-04-04 15:55:02 +0000
commit: c687225b436cd0ff0a6061a6f79f13c35e6b6228 (patch)
tree: 71f1428bdf6f4138fd2c53ebe76960b814fc4ec2 /clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
parent: 1eec3f01f07c88325f9958b85dee8d33f548af52 (diff)
download: bcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.tar.gz
bcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.zip
1 files changed, 44 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 3b2b30cdb73..d64f6df7201 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -14,6 +14,8 @@
 
 #include "CGOpenMPRuntimeNVPTX.h"
 #include "clang/AST/DeclOpenMP.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
 
 using namespace clang;
 using namespace CodeGen;
@@ -350,3 +352,45 @@ CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
   // Called once per module during initialization.
   initializeEnvironment();
 }
+
+void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
+                                              const Expr *NumTeams,
+                                              const Expr *ThreadLimit,
+                                              SourceLocation Loc) {}
+
+llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOrTeamsOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+
+  llvm::Function *OutlinedFun = nullptr;
+  if (isa<OMPTeamsDirective>(D)) {
+    llvm::Value *OutlinedFunVal =
+        CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
+            D, ThreadIDVar, InnermostKind, CodeGen);
+    OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
+    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+  } else
+    llvm_unreachable("parallel directive is not yet supported for nvptx "
+                     "backend.");
+
+  return OutlinedFun;
+}
+
+void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
+                                         const OMPExecutableDirective &D,
+                                         SourceLocation Loc,
+                                         llvm::Value *OutlinedFn,
+                                         ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  Address ZeroAddr =
+      CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
+                           /*Name*/ ".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+  CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
+}
author	Carlo Bertolli <cbertol@us.ibm.com>	2016-04-04 15:55:02 +0000
committer	Carlo Bertolli <cbertol@us.ibm.com>	2016-04-04 15:55:02 +0000
commit	c687225b436cd0ff0a6061a6f79f13c35e6b6228 (patch)
tree	71f1428bdf6f4138fd2c53ebe76960b814fc4ec2 /clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
parent	1eec3f01f07c88325f9958b85dee8d33f548af52 (diff)
download	bcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.tar.gz bcm5719-llvm-c687225b436cd0ff0a6061a6f79f13c35e6b6228.zip