summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp41
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h3
2 files changed, 32 insertions, 12 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 5e2676bf167..97b8f79a9f9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -96,8 +96,11 @@ enum OpenMPRTLFunctionNVPTX {
OMPRTL_NVPTX__kmpc_get_team_static_memory,
/// Call to void __kmpc_restore_team_static_memory(int16_t is_shared);
OMPRTL_NVPTX__kmpc_restore_team_static_memory,
- // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+ /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
OMPRTL__kmpc_barrier,
+ /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+ /// global_tid);
+ OMPRTL__kmpc_barrier_simple_spmd,
};
/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
@@ -640,17 +643,6 @@ static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
"nvptx_num_threads");
}
-/// Get barrier to synchronize all threads in a block.
-static void getNVPTXCTABarrier(CodeGenFunction &CGF) {
- llvm::Function *F = llvm::Intrinsic::getDeclaration(
- &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0);
- F->addFnAttr(llvm::Attribute::Convergent);
- CGF.EmitRuntimeCall(F);
-}
-
-/// Synchronize all GPU threads in a block.
-static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); }
-
/// Get the value of the thread_limit clause in the teams directive.
/// For the 'generic' execution mode, the runtime encodes thread_limit in
/// the launch parameters, always starting thread_limit+warpSize threads per
@@ -1813,6 +1805,17 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
break;
}
+ case OMPRTL__kmpc_barrier_simple_spmd: {
+ // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+ // global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
+ cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent);
+ break;
+ }
}
return RTLFn;
}
@@ -2665,6 +2668,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
}
}
+void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
+ // Always emit simple barriers!
+ if (!CGF.HaveInsertPoint())
+ return;
+ // Build call __kmpc_barrier_simple_spmd(nullptr, 0);
+ // This function does not use parameters, so we can emit just default values.
+ llvm::Value *Args[] = {
+ llvm::ConstantPointerNull::get(
+ cast<llvm::PointerType>(getIdentTyPointerTy())),
+ llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
+}
+
void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
SourceLocation Loc,
OpenMPDirectiveKind Kind, bool,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 8fb3b0a0610..6091610c37e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -58,6 +58,9 @@ private:
bool requiresFullRuntime() const { return RequiresFullRuntime; }
+ /// Get barrier to synchronize all threads in a block.
+ void syncCTAThreads(CodeGenFunction &CGF);
+
/// Emit the worker function for the current target region.
void emitWorkerFunction(WorkerFunctionState &WST);
OpenPOWER on IntegriCloud