diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2019-01-03 16:25:35 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2019-01-03 16:25:35 +0000 |
commit | a3924b517e2d10c25ee3b41dfd96bb9a5333b64e (patch) | |
tree | c616eef5a288481766e0d99333a0f7e00380a83b /clang/lib | |
parent | 16228bc65e46de9045d522dfe7afbba8c4bb5955 (diff) | |
download | bcm5719-llvm-a3924b517e2d10c25ee3b41dfd96bb9a5333b64e.tar.gz bcm5719-llvm-a3924b517e2d10c25ee3b41dfd96bb9a5333b64e.zip |
[OPENMP][NVPTX]Use __kmpc_barrier_simple_spmd(nullptr, 0) instead of
nvvm_barrier0.
Use runtime functions instead of the direct call to the nvvm intrinsics.
It allows to prevent some dangerous LLVM optimizations, that breaks the
code for the NVPTX target.
llvm-svn: 350328
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 41 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 3 |
2 files changed, 32 insertions, 12 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 5e2676bf167..97b8f79a9f9 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -96,8 +96,11 @@ enum OpenMPRTLFunctionNVPTX { OMPRTL_NVPTX__kmpc_get_team_static_memory, /// Call to void __kmpc_restore_team_static_memory(int16_t is_shared); OMPRTL_NVPTX__kmpc_restore_team_static_memory, - // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); OMPRTL__kmpc_barrier, + /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 + /// global_tid); + OMPRTL__kmpc_barrier_simple_spmd, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -640,17 +643,6 @@ static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) { "nvptx_num_threads"); } -/// Get barrier to synchronize all threads in a block. -static void getNVPTXCTABarrier(CodeGenFunction &CGF) { - llvm::Function *F = llvm::Intrinsic::getDeclaration( - &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_barrier0); - F->addFnAttr(llvm::Attribute::Convergent); - CGF.EmitRuntimeCall(F); -} - -/// Synchronize all GPU threads in a block. -static void syncCTAThreads(CodeGenFunction &CGF) { getNVPTXCTABarrier(CGF); } - /// Get the value of the thread_limit clause in the teams directive. /// For the 'generic' execution mode, the runtime encodes thread_limit in /// the launch parameters, always starting thread_limit+warpSize threads per @@ -1813,6 +1805,17 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); break; } + case OMPRTL__kmpc_barrier_simple_spmd: { + // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 + // global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier_simple_spmd"); + cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + break; + } } return RTLFn; } @@ -2665,6 +2668,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( } } +void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) { + // Always emit simple barriers! + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_barrier_simple_spmd(nullptr, 0); + // This function does not use parameters, so we can emit just default values. + llvm::Value *Args[] = { + llvm::ConstantPointerNull::get( + cast<llvm::PointerType>(getIdentTyPointerTy())), + llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)}; + CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args); +} + void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 8fb3b0a0610..6091610c37e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -58,6 +58,9 @@ private: bool requiresFullRuntime() const { return RequiresFullRuntime; } + /// Get barrier to synchronize all threads in a block. + void syncCTAThreads(CodeGenFunction &CGF); + /// Emit the worker function for the current target region. void emitWorkerFunction(WorkerFunctionState &WST); |