diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2019-08-26 19:07:48 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2019-08-26 19:07:48 +0000 |
commit | 2cd7fafc11fc1f721e88257d9befcc36ccf54294 (patch) | |
tree | 6d4f50f7a8475eef12539aae8257b010030ce06b /clang/lib/CodeGen | |
parent | b504eb8bb5ed952533a4fbb113e5a63ba6fd859d (diff) | |
download | bcm5719-llvm-2cd7fafc11fc1f721e88257d9befcc36ccf54294.tar.gz bcm5719-llvm-2cd7fafc11fc1f721e88257d9befcc36ccf54294.zip |
[OPENMP][NVPTX]Fix critical region codegen.
Summary:
Previously critical regions were emitted with the barrier making it a
worksharing construct though it is not. Also, it leads to incorrect
behavior in Cuda9+. Patch fixes this problem.
Reviewers: ABataev, jdoerfert
Subscribers: jholewinski, guansong, cfe-commits, grokos
Tags: #clang
Differential Revision: https://reviews.llvm.org/D66673
llvm-svn: 369946
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 284d14b9958..6c908412e0c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -107,6 +107,10 @@ enum OpenMPRTLFunctionNVPTX { /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32 /// global_tid); OMPRTL__kmpc_barrier_simple_spmd, + /// Call to int32_t __kmpc_warp_active_thread_mask(void); + OMPRTL_NVPTX__kmpc_warp_active_thread_mask, + /// Call to void __kmpc_syncwarp(int32_t Mask); + OMPRTL_NVPTX__kmpc_syncwarp, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -1794,6 +1798,20 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { ->addFnAttr(llvm::Attribute::Convergent); break; } + case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: { + // Build int32_t __kmpc_warp_active_thread_mask(void); + auto *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask"); + break; + } + case OMPRTL_NVPTX__kmpc_syncwarp: { + // Build void __kmpc_syncwarp(kmp_int32 Mask); + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_syncwarp"); + break; + } } return RTLFn; } @@ -2700,6 +2718,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion( llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body"); llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit"); + // Get the mask of active threads in the warp. + llvm::Value *Mask = CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask)); // Fetch team-local id of the thread. llvm::Value *ThreadID = getNVPTXThreadID(CGF); @@ -2740,8 +2761,9 @@ void CGOpenMPRuntimeNVPTX::emitCriticalRegion( // Block waits for all threads in current team to finish then increments the // counter variable and returns to the loop. CGF.EmitBlock(SyncBB); - emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false, - /*ForceSimpleCall=*/true); + // Reconverge active threads in the warp. + (void)CGF.EmitRuntimeCall( + createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask); llvm::Value *IncCounterVal = CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1)); |