diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2015-03-23 06:18:07 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-03-23 06:18:07 +0000 |
commit | a63048e4fd2936e600a1268651a4f2ad981309d0 (patch) | |
tree | 571fb385d651af11e25506fec4590f3aaca5ecb4 /clang/lib/CodeGen | |
parent | 1565992679f173ca9517637ac3aeca4636b624ff (diff) | |
download | bcm5719-llvm-a63048e4fd2936e600a1268651a4f2ad981309d0.tar.gz bcm5719-llvm-a63048e4fd2936e600a1268651a4f2ad981309d0.zip |
[OPENMP] Codegen for 'copyprivate' clause ('single' directive).
If there is at least one 'copyprivate' clause is associated with the single directive, the following code is generated:
```
i32 did_it = 0; \\ for 'copyprivate' clause
if(__kmpc_single(ident_t *, gtid)) {
SingleOpGen();
__kmpc_end_single(ident_t *, gtid);
did_it = 1; \\ for 'copyprivate' clause
}
<copyprivate_list>[0] = &var0;
...
<copyprivate_list>[n] = &varn;
call __kmpc_copyprivate(ident_t *, gtid, <copyprivate_list_size>,
<copyprivate_list>, <copy_func>, did_it);
...
void<copy_func>(void *LHSArg, void *RHSArg) {
Dst = (void * [n])(LHSArg);
Src = (void * [n])(RHSArg);
Dst[0] = Src[0];
... Dst[n] = Src[n];
}
```
All list items from all 'copyprivate' clauses are gathered into single <copyprivate list> (<copyprivate_list_size> is a size in bytes of this list) and <copy_func> is used to propagate values of private or threadprivate variables from the 'single' region to other implicit threads from outer 'parallel' region.
Differential Revision: http://reviews.llvm.org/D8410
llvm-svn: 232932
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 143 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 10 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 31 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 5 |
4 files changed, 184 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 25755838013..ef2d2147aca 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -580,6 +580,21 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task"); break; } + case OMPRTL__kmpc_copyprivate: { + // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), + // kmp_int32 didit); + llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto *CpyFnTy = + llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, + CGM.VoidPtrTy, CpyFnTy->getPointerTo(), + CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); + break; + } } return RTLFn; } @@ -965,19 +980,107 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); } +static llvm::Value *emitCopyprivateCopyFunction( + CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> SrcExprs, + ArrayRef<const Expr *> DstExprs, ArrayRef<const Expr *> AssignmentOps) { + auto &C = CGM.getContext(); + // void copy_func(void *LHSArg, void *RHSArg); + FunctionArgList Args; + ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, + C.VoidPtrTy); + ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, + C.VoidPtrTy); + Args.push_back(&LHSArg); + Args.push_back(&RHSArg); + FunctionType::ExtInfo EI; + auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( + C.VoidTy, Args, EI, /*isVariadic=*/false); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + ".omp.copyprivate.copy_func", &CGM.getModule()); + CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + // Dst = (void*[n])(LHSArg); + // Src = (void*[n])(RHSArg); + auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), + CGF.PointerAlignInBytes), + ArgsType); + auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), + CGF.PointerAlignInBytes), + ArgsType); + // *(Type0*)Dst[0] = *(Type0*)Src[0]; + // *(Type1*)Dst[1] = *(Type1*)Src[1]; + // ... + // *(Typen*)Dst[n] = *(Typen*)Src[n]; + CodeGenFunction::OMPPrivateScope Scope(CGF); + for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { + Scope.addPrivate( + cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()), + [&]() -> llvm::Value *{ + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(RHS, I), + CGM.PointerAlignInBytes), + CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); + }); + Scope.addPrivate( + cast<VarDecl>(cast<DeclRefExpr>(DstExprs[I])->getDecl()), + [&]() -> llvm::Value *{ + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(LHS, I), + CGM.PointerAlignInBytes), + CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType()))); + }); + } + Scope.Privatize(); + for (auto *E : AssignmentOps) { + CGF.EmitIgnoredExpr(E); + } + Scope.ForceCleanup(); + CGF.FinishFunction(); + return Fn; +} + void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, const std::function<void()> &SingleOpGen, - SourceLocation Loc) { + SourceLocation Loc, + ArrayRef<const Expr *> CopyprivateVars, + ArrayRef<const Expr *> SrcExprs, + ArrayRef<const Expr *> DstExprs, + ArrayRef<const Expr *> AssignmentOps) { + assert(CopyprivateVars.size() == SrcExprs.size() && + CopyprivateVars.size() == DstExprs.size() && + CopyprivateVars.size() == AssignmentOps.size()); + auto &C = CGM.getContext(); + // int32 did_it = 0; // if(__kmpc_single(ident_t *, gtid)) { // SingleOpGen(); // __kmpc_end_single(ident_t *, gtid); + // did_it = 1; // } + // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, + // <copy_func>, did_it); + + llvm::AllocaInst *DidIt = nullptr; + if (!CopyprivateVars.empty()) { + // int32 did_it = 0; + auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); + CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0)); + } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; auto *IsSingle = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); emitIfStmt(CGF, IsSingle, [&]() -> void { SingleOpGen(); + if (DidIt) { + // did_it = 1; + CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, + DidIt->getAlignment()); + } // Build a call to __kmpc_end_single. // OpenMP [1.2.2 OpenMP Language Terminology] // For C/C++, an executable statement, possibly compound, with a single @@ -994,6 +1097,44 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // fallthrough rather than pushing a normal cleanup for it. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args); }); + // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, + // <copy_func>, did_it); + if (DidIt) { + llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); + auto CopyprivateArrayTy = + C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + // Create a list of all private variables for copyprivate. + auto *CopyprivateList = + CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); + for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { + auto *Elem = CGF.Builder.CreateStructGEP(CopyprivateList, I); + CGF.Builder.CreateAlignedStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy), + Elem, CGM.PointerAlignInBytes); + } + // Build function that copies private values from single region to all other + // threads in the corresponding parallel region. + auto *CpyFn = emitCopyprivateCopyFunction( + CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), + SrcExprs, DstExprs, AssignmentOps); + auto *BufSize = CGF.Builder.getInt32( + C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity()); + auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, + CGF.VoidPtrTy); + auto *DidItVal = + CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc), // ident_t *<loc> + getThreadID(CGF, Loc), // i32 <gtid> + BufSize, // i32 <buf_size> + CL, // void *<copyprivate list> + CpyFn, // void (*) (void *, void *) <copy_func> + DidItVal // i32 did_it + }; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args); + } } void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index a3f957045d7..f8849e627c8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -96,6 +96,10 @@ class CGOpenMPRuntime { // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t * // new_task); OMPRTL__kmpc_omp_task, + // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), + // kmp_int32 didit); + OMPRTL__kmpc_copyprivate, }; /// \brief Values for bit flags used in the ident_t to describe the fields. @@ -348,7 +352,11 @@ public: /// single region. virtual void emitSingleRegion(CodeGenFunction &CGF, const std::function<void()> &SingleOpGen, - SourceLocation Loc); + SourceLocation Loc, + ArrayRef<const Expr *> CopyprivateVars, + ArrayRef<const Expr *> SrcExprs, + ArrayRef<const Expr *> DstExprs, + ArrayRef<const Expr *> AssignmentOps); /// \brief Emits explicit barrier for OpenMP threads. /// \param IsExplicit true, if it is explicitly specified barrier. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 42e6a841cdf..9af74749f17 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -882,7 +882,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { InlinedOpenMPRegionScopeRAII Region(*this, S); EmitStmt(Stmt); EnsureInsertPoint(); - }, S.getLocStart()); + }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None); } // Emit an implicit barrier at the end. @@ -898,11 +898,38 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { } void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { + llvm::SmallVector<const Expr *, 8> CopyprivateVars; + llvm::SmallVector<const Expr *, 8> SrcExprs; + llvm::SmallVector<const Expr *, 8> DstExprs; + llvm::SmallVector<const Expr *, 8> AssignmentOps; + // Check if there are any 'copyprivate' clauses associated with this 'single' + // construct. + auto CopyprivateFilter = [](const OMPClause *C) -> bool { + return C->getClauseKind() == OMPC_copyprivate; + }; + // Build a list of copyprivate variables along with helper expressions + // (<source>, <destination>, <destination>=<source> expressions) + typedef OMPExecutableDirective::filtered_clause_iterator<decltype( + CopyprivateFilter)> CopyprivateIter; + for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) { + auto *C = cast<OMPCopyprivateClause>(*I); + CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); + SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); + DstExprs.append(C->destination_exprs().begin(), + C->destination_exprs().end()); + AssignmentOps.append(C->assignment_ops().begin(), + C->assignment_ops().end()); + } + // Emit code for 'single' region along with 'copyprivate' clauses CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void { InlinedOpenMPRegionScopeRAII Region(*this, S); EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); EnsureInsertPoint(); - }, S.getLocStart()); + }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps); + // Emit an implicit barrier at the end. + if (!S.getSingleClause(OMPC_nowait)) + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(), + /*IsExplicit=*/false); } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 474668abf89..151eb7e48d9 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -604,7 +604,10 @@ public: } /// \brief Exit scope - all the mapped variables are restored. - ~OMPPrivateScope() { ForceCleanup(); } + ~OMPPrivateScope() { + if (PerformCleanup) + ForceCleanup(); + } }; /// \brief Takes the old cleanup stack size and emits the cleanup blocks |