diff options
Diffstat (limited to 'clang')
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 19 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 4 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 25 | ||||
| -rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 12 | ||||
| -rw-r--r-- | clang/test/OpenMP/nvptx_parallel_codegen.cpp | 10 |
5 files changed, 61 insertions, 9 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index cade093941a..66f0783e27d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3214,13 +3214,7 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } -void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, - OpenMPDirectiveKind Kind, bool EmitChecks, - bool ForceSimpleCall) { - if (!CGF.HaveInsertPoint()) - return; - // Build call __kmpc_cancel_barrier(loc, thread_id); - // Build call __kmpc_barrier(loc, thread_id); +unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { unsigned Flags; if (Kind == OMPD_for) Flags = OMP_IDENT_BARRIER_IMPL_FOR; @@ -3232,6 +3226,17 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, Flags = OMP_IDENT_BARRIER_EXPL; else Flags = OMP_IDENT_BARRIER_IMPL; + return Flags; +} + +void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks, + bool ForceSimpleCall) { + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + // Build call __kmpc_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, // thread_id); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index d3326234143..f4a8a97970e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -290,6 +290,10 @@ protected: /// default location. virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } + /// Returns default flags for the barriers depending on the directive, for + /// which this barier is going to be emitted. + static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); + /// Get the LLVM type for the critical name. llvm::ArrayType *getKmpCriticalNameTy() const {return KmpCriticalNameTy;} diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 2bc98f6178b..cf814a4b203 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -96,6 +96,8 @@ enum OpenMPRTLFunctionNVPTX { OMPRTL_NVPTX__kmpc_get_team_static_memory, /// Call to void __kmpc_restore_team_static_memory(int16_t is_shared); OMPRTL_NVPTX__kmpc_restore_team_static_memory, + // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + OMPRTL__kmpc_barrier, }; /// Pre(post)-action for different OpenMP constructs specialized for NVPTX. @@ -1824,6 +1826,15 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) { CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory"); break; } + case OMPRTL__kmpc_barrier: { + // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + auto *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier"); + cast<llvm::Function>(RTLFn)->addFnAttr(llvm::Attribute::Convergent); + break; + } } return RTLFn; } @@ -2676,6 +2687,20 @@ void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall( } } +void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF, + SourceLocation Loc, + OpenMPDirectiveKind Kind, bool, + bool) { + // Always emit simple barriers! + if (!CGF.HaveInsertPoint()) + return; + // Build call __kmpc_cancel_barrier(loc, thread_id); + unsigned Flags = getDefaultFlagsForBarriers(Kind); + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), + getThreadID(CGF, Loc)}; + CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args); +} + void CGOpenMPRuntimeNVPTX::emitCriticalRegion( CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 8ba2759c8b3..8fb3b0a0610 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -274,6 +274,18 @@ public: ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) override; + /// Emit an implicit/explicit barrier for OpenMP threads. + /// \param Kind Directive for which this implicit barrier call must be + /// generated. Must be OMPD_barrier for explicit barrier generation. + /// \param EmitChecks true if need to emit checks for cancellation barriers. + /// \param ForceSimpleCall true simple barrier call must be emitted, false if + /// runtime class decides which one to emit (simple or with cancellation + /// checks). + /// + void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, + OpenMPDirectiveKind Kind, bool EmitChecks = true, + bool ForceSimpleCall = false) override; + /// Emits a critical region. /// \param CriticalName Name of the critical region. /// \param CriticalOpGen Generator for the statement associated with the given diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index 2fd837c92b2..08431fccc00 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -45,6 +45,7 @@ tx ftemplate(int n) { #pragma omp parallel if(n>1000) { int a = 45; +#pragma omp barrier } a += 1; aa += 1; @@ -317,10 +318,13 @@ int bar(int n){ // CHECK: define internal void [[PARALLEL_FN4]]( // CHECK: [[A:%.+]] = alloca i[[SZ:32|64]], // CHECK: store i[[SZ]] 45, i[[SZ]]* %a, +// CHECK: call void @__kmpc_barrier(%struct.ident_t* @{{.+}}, i32 %{{.+}}) // CHECK: ret void -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l54}}_worker() -// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l54}}( +// CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32) #[[BARRIER_ATTRS:.+]] + +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}_worker() +// CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l55}}( // CHECK-32: [[A_ADDR:%.+]] = alloca i32, // CHECK-64: [[A_ADDR:%.+]] = alloca i64, // CHECK-64: [[CONV:%.+]] = bitcast i64* [[A_ADDR]] to i32* @@ -357,4 +361,6 @@ int bar(int n){ // CHECK: store i32 [[NEW_CC_VAL]], i32* [[CC]], // CHECK: br label +// CHECK: attributes #[[BARRIER_ATTRS]] = {{.*}} convergent {{.*}} + #endif |

