summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2015-03-23 06:18:07 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2015-03-23 06:18:07 +0000
commita63048e4fd2936e600a1268651a4f2ad981309d0 (patch)
tree571fb385d651af11e25506fec4590f3aaca5ecb4 /clang/lib/CodeGen
parent1565992679f173ca9517637ac3aeca4636b624ff (diff)
downloadbcm5719-llvm-a63048e4fd2936e600a1268651a4f2ad981309d0.tar.gz
bcm5719-llvm-a63048e4fd2936e600a1268651a4f2ad981309d0.zip
[OPENMP] Codegen for 'copyprivate' clause ('single' directive).
If there is at least one 'copyprivate' clause is associated with the single directive, the following code is generated: ``` i32 did_it = 0; \\ for 'copyprivate' clause if(__kmpc_single(ident_t *, gtid)) { SingleOpGen(); __kmpc_end_single(ident_t *, gtid); did_it = 1; \\ for 'copyprivate' clause } <copyprivate_list>[0] = &var0; ... <copyprivate_list>[n] = &varn; call __kmpc_copyprivate(ident_t *, gtid, <copyprivate_list_size>, <copyprivate_list>, <copy_func>, did_it); ... void<copy_func>(void *LHSArg, void *RHSArg) { Dst = (void * [n])(LHSArg); Src = (void * [n])(RHSArg); Dst[0] = Src[0]; ... Dst[n] = Src[n]; } ``` All list items from all 'copyprivate' clauses are gathered into single <copyprivate list> (<copyprivate_list_size> is a size in bytes of this list) and <copy_func> is used to propagate values of private or threadprivate variables from the 'single' region to other implicit threads from outer 'parallel' region. Differential Revision: http://reviews.llvm.org/D8410 llvm-svn: 232932
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp143
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h10
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp31
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h5
4 files changed, 184 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 25755838013..ef2d2147aca 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -580,6 +580,21 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
break;
}
+ case OMPRTL__kmpc_copyprivate: {
+ // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
+ // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
+ // kmp_int32 didit);
+ llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
+ auto *CpyFnTy =
+ llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
+ CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
+ CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
+ break;
+ }
}
return RTLFn;
}
@@ -965,19 +980,107 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
}
+static llvm::Value *emitCopyprivateCopyFunction(
+ CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> SrcExprs,
+ ArrayRef<const Expr *> DstExprs, ArrayRef<const Expr *> AssignmentOps) {
+ auto &C = CGM.getContext();
+ // void copy_func(void *LHSArg, void *RHSArg);
+ FunctionArgList Args;
+ ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
+ C.VoidPtrTy);
+ ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
+ C.VoidPtrTy);
+ Args.push_back(&LHSArg);
+ Args.push_back(&RHSArg);
+ FunctionType::ExtInfo EI;
+ auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+ C.VoidTy, Args, EI, /*isVariadic=*/false);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ ".omp.copyprivate.copy_func", &CGM.getModule());
+ CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
+ CodeGenFunction CGF(CGM);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+ // Dst = (void*[n])(LHSArg);
+ // Src = (void*[n])(RHSArg);
+ auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
+ CGF.PointerAlignInBytes),
+ ArgsType);
+ auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
+ CGF.PointerAlignInBytes),
+ ArgsType);
+ // *(Type0*)Dst[0] = *(Type0*)Src[0];
+ // *(Type1*)Dst[1] = *(Type1*)Src[1];
+ // ...
+ // *(Typen*)Dst[n] = *(Typen*)Src[n];
+ CodeGenFunction::OMPPrivateScope Scope(CGF);
+ for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
+ Scope.addPrivate(
+ cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
+ [&]() -> llvm::Value *{
+ return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(RHS, I),
+ CGM.PointerAlignInBytes),
+ CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
+ });
+ Scope.addPrivate(
+ cast<VarDecl>(cast<DeclRefExpr>(DstExprs[I])->getDecl()),
+ [&]() -> llvm::Value *{
+ return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateAlignedLoad(CGF.Builder.CreateStructGEP(LHS, I),
+ CGM.PointerAlignInBytes),
+ CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
+ });
+ }
+ Scope.Privatize();
+ for (auto *E : AssignmentOps) {
+ CGF.EmitIgnoredExpr(E);
+ }
+ Scope.ForceCleanup();
+ CGF.FinishFunction();
+ return Fn;
+}
+
void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
const std::function<void()> &SingleOpGen,
- SourceLocation Loc) {
+ SourceLocation Loc,
+ ArrayRef<const Expr *> CopyprivateVars,
+ ArrayRef<const Expr *> SrcExprs,
+ ArrayRef<const Expr *> DstExprs,
+ ArrayRef<const Expr *> AssignmentOps) {
+ assert(CopyprivateVars.size() == SrcExprs.size() &&
+ CopyprivateVars.size() == DstExprs.size() &&
+ CopyprivateVars.size() == AssignmentOps.size());
+ auto &C = CGM.getContext();
+ // int32 did_it = 0;
// if(__kmpc_single(ident_t *, gtid)) {
// SingleOpGen();
// __kmpc_end_single(ident_t *, gtid);
+ // did_it = 1;
// }
+ // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
+ // <copy_func>, did_it);
+
+ llvm::AllocaInst *DidIt = nullptr;
+ if (!CopyprivateVars.empty()) {
+ // int32 did_it = 0;
+ auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
+ DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
+ CGF.InitTempAlloca(DidIt, CGF.Builder.getInt32(0));
+ }
// Prepare arguments and build a call to __kmpc_single
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
auto *IsSingle =
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
emitIfStmt(CGF, IsSingle, [&]() -> void {
SingleOpGen();
+ if (DidIt) {
+ // did_it = 1;
+ CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
+ DidIt->getAlignment());
+ }
// Build a call to __kmpc_end_single.
// OpenMP [1.2.2 OpenMP Language Terminology]
// For C/C++, an executable statement, possibly compound, with a single
@@ -994,6 +1097,44 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
// fallthrough rather than pushing a normal cleanup for it.
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_end_single), Args);
});
+ // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
+ // <copy_func>, did_it);
+ if (DidIt) {
+ llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
+ auto CopyprivateArrayTy =
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ // Create a list of all private variables for copyprivate.
+ auto *CopyprivateList =
+ CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
+ for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
+ auto *Elem = CGF.Builder.CreateStructGEP(CopyprivateList, I);
+ CGF.Builder.CreateAlignedStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
+ Elem, CGM.PointerAlignInBytes);
+ }
+ // Build function that copies private values from single region to all other
+ // threads in the corresponding parallel region.
+ auto *CpyFn = emitCopyprivateCopyFunction(
+ CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
+ SrcExprs, DstExprs, AssignmentOps);
+ auto *BufSize = CGF.Builder.getInt32(
+ C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
+ auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
+ CGF.VoidPtrTy);
+ auto *DidItVal =
+ CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
+ llvm::Value *Args[] = {
+ emitUpdateLocation(CGF, Loc), // ident_t *<loc>
+ getThreadID(CGF, Loc), // i32 <gtid>
+ BufSize, // i32 <buf_size>
+ CL, // void *<copyprivate list>
+ CpyFn, // void (*) (void *, void *) <copy_func>
+ DidItVal // i32 did_it
+ };
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_copyprivate), Args);
+ }
}
void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index a3f957045d7..f8849e627c8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -96,6 +96,10 @@ class CGOpenMPRuntime {
// Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
// new_task);
OMPRTL__kmpc_omp_task,
+ // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
+ // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
+ // kmp_int32 didit);
+ OMPRTL__kmpc_copyprivate,
};
/// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -348,7 +352,11 @@ public:
/// single region.
virtual void emitSingleRegion(CodeGenFunction &CGF,
const std::function<void()> &SingleOpGen,
- SourceLocation Loc);
+ SourceLocation Loc,
+ ArrayRef<const Expr *> CopyprivateVars,
+ ArrayRef<const Expr *> SrcExprs,
+ ArrayRef<const Expr *> DstExprs,
+ ArrayRef<const Expr *> AssignmentOps);
/// \brief Emits explicit barrier for OpenMP threads.
/// \param IsExplicit true, if it is explicitly specified barrier.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 42e6a841cdf..9af74749f17 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -882,7 +882,7 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
InlinedOpenMPRegionScopeRAII Region(*this, S);
EmitStmt(Stmt);
EnsureInsertPoint();
- }, S.getLocStart());
+ }, S.getLocStart(), llvm::None, llvm::None, llvm::None, llvm::None);
}
// Emit an implicit barrier at the end.
@@ -898,11 +898,38 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
}
void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
+ llvm::SmallVector<const Expr *, 8> CopyprivateVars;
+ llvm::SmallVector<const Expr *, 8> SrcExprs;
+ llvm::SmallVector<const Expr *, 8> DstExprs;
+ llvm::SmallVector<const Expr *, 8> AssignmentOps;
+ // Check if there are any 'copyprivate' clauses associated with this 'single'
+ // construct.
+ auto CopyprivateFilter = [](const OMPClause *C) -> bool {
+ return C->getClauseKind() == OMPC_copyprivate;
+ };
+ // Build a list of copyprivate variables along with helper expressions
+ // (<source>, <destination>, <destination>=<source> expressions)
+ typedef OMPExecutableDirective::filtered_clause_iterator<decltype(
+ CopyprivateFilter)> CopyprivateIter;
+ for (CopyprivateIter I(S.clauses(), CopyprivateFilter); I; ++I) {
+ auto *C = cast<OMPCopyprivateClause>(*I);
+ CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
+ SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
+ DstExprs.append(C->destination_exprs().begin(),
+ C->destination_exprs().end());
+ AssignmentOps.append(C->assignment_ops().begin(),
+ C->assignment_ops().end());
+ }
+ // Emit code for 'single' region along with 'copyprivate' clauses
CGM.getOpenMPRuntime().emitSingleRegion(*this, [&]() -> void {
InlinedOpenMPRegionScopeRAII Region(*this, S);
EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
EnsureInsertPoint();
- }, S.getLocStart());
+ }, S.getLocStart(), CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
+ // Emit an implicit barrier at the end.
+ if (!S.getSingleClause(OMPC_nowait))
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getLocStart(),
+ /*IsExplicit=*/false);
}
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 474668abf89..151eb7e48d9 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -604,7 +604,10 @@ public:
}
/// \brief Exit scope - all the mapped variables are restored.
- ~OMPPrivateScope() { ForceCleanup(); }
+ ~OMPPrivateScope() {
+ if (PerformCleanup)
+ ForceCleanup();
+ }
};
/// \brief Takes the old cleanup stack size and emits the cleanup blocks
OpenPOWER on IntegriCloud