summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@hotmail.com>2015-04-10 10:43:45 +0000
committerAlexey Bataev <a.bataev@hotmail.com>2015-04-10 10:43:45 +0000
commit794ba0dcb7baf17371445b788bf245441d1bbc3d (patch)
tree46c092f1660597a696f87baa74067755f42514bb /clang/lib/CodeGen
parent8ad3627e194c9da8f748df8abaa4c0e469a953a8 (diff)
downloadbcm5719-llvm-794ba0dcb7baf17371445b788bf245441d1bbc3d.tar.gz
bcm5719-llvm-794ba0dcb7baf17371445b788bf245441d1bbc3d.zip
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions: static kmp_critical_name lock = { 0 }; void reduce_func(void *lhs[<n>], void *rhs[<n>]) { ... *(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]); ... } ... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]}; switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) { case 1: ... <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); ... __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); break; case 2: ... Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); ... break; default: ; } Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation. Differential Revision: http://reviews.llvm.org/D8915 llvm-svn: 234583
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp355
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h58
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp281
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h29
4 files changed, 592 insertions, 131 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 2b9fbd66613..bb00a0cd375 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -657,6 +657,62 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
break;
}
+ case OMPRTL__kmpc_reduce: {
+ // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
+ // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
+ // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
+ llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
+ auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
+ CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
+ llvm::PointerType::getUnqual(KmpCriticalNameTy)};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
+ break;
+ }
+ case OMPRTL__kmpc_reduce_nowait: {
+ // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
+ // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
+ // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
+ // *lck);
+ llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
+ auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
+ /*isVarArg=*/false);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
+ CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
+ llvm::PointerType::getUnqual(KmpCriticalNameTy)};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
+ break;
+ }
+ case OMPRTL__kmpc_end_reduce: {
+ // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
+ // kmp_critical_name *lck);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty,
+ llvm::PointerType::getUnqual(KmpCriticalNameTy)};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
+ break;
+ }
+ case OMPRTL__kmpc_end_reduce_nowait: {
+ // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
+ // kmp_critical_name *lck);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), CGM.Int32Ty,
+ llvm::PointerType::getUnqual(KmpCriticalNameTy)};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn =
+ CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
+ break;
+ }
}
return RTLFn;
}
@@ -998,11 +1054,11 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
getCriticalRegionLock(CriticalName)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args);
- emitInlinedDirective(CGF, CriticalOpGen);
// Build a call to __kmpc_end_critical
CGF.EHStack.pushCleanup<CallEndCleanup>(
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical),
llvm::makeArrayRef(Args));
+ emitInlinedDirective(CGF, CriticalOpGen);
}
}
@@ -1037,26 +1093,10 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args);
emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void {
CodeGenFunction::RunCleanupsScope Scope(CGF);
- MasterOpGen(CGF);
- // Build a call to __kmpc_end_master.
- // OpenMP [1.2.2 OpenMP Language Terminology]
- // For C/C++, an executable statement, possibly compound, with a single
- // entry at the top and a single exit at the bottom, or an OpenMP
- // construct.
- // * Access to the structured block must not be the result of a branch.
- // * The point of exit cannot be a branch out of the structured block.
- // * The point of entry must not be a call to setjmp().
- // * longjmp() and throw() must not violate the entry/exit criteria.
- // * An expression statement, iteration statement, selection statement, or
- // try block is considered to be a structured block if the corresponding
- // compound statement obtained by enclosing it in { and } would be a
- // structured block.
- // It is analyzed in Sema, so we can just call __kmpc_end_master() on
- // fallthrough rather than pushing a normal cleanup for it.
- // Build a call to __kmpc_end_critical
CGF.EHStack.pushCleanup<CallEndCleanup>(
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master),
llvm::makeArrayRef(Args));
+ MasterOpGen(CGF);
});
}
@@ -1167,29 +1207,15 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args);
emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void {
CodeGenFunction::RunCleanupsScope Scope(CGF);
+ CGF.EHStack.pushCleanup<CallEndCleanup>(
+ NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
+ llvm::makeArrayRef(Args));
SingleOpGen(CGF);
if (DidIt) {
// did_it = 1;
CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
DidIt->getAlignment());
}
- // Build a call to __kmpc_end_single.
- // OpenMP [1.2.2 OpenMP Language Terminology]
- // For C/C++, an executable statement, possibly compound, with a single
- // entry at the top and a single exit at the bottom, or an OpenMP construct.
- // * Access to the structured block must not be the result of a branch.
- // * The point of exit cannot be a branch out of the structured block.
- // * The point of entry must not be a call to setjmp().
- // * longjmp() and throw() must not violate the entry/exit criteria.
- // * An expression statement, iteration statement, selection statement, or
- // try block is considered to be a structured block if the corresponding
- // compound statement obtained by enclosing it in { and } would be a
- // structured block.
- // It is analyzed in Sema, so we can just call __kmpc_end_single() on
- // fallthrough rather than pushing a normal cleanup for it.
- CGF.EHStack.pushCleanup<CallEndCleanup>(
- NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
- llvm::makeArrayRef(Args));
});
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
// <copy_func>, did_it);
@@ -1615,6 +1641,265 @@ void CGOpenMPRuntime::emitTaskCall(
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
}
+static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
+ llvm::Type *ArgsType,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps) {
+ auto &C = CGM.getContext();
+
+ // void reduction_func(void *LHSArg, void *RHSArg);
+ FunctionArgList Args;
+ ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
+ C.VoidPtrTy);
+ ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr,
+ C.VoidPtrTy);
+ Args.push_back(&LHSArg);
+ Args.push_back(&RHSArg);
+ FunctionType::ExtInfo EI;
+ auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration(
+ C.VoidTy, Args, EI, /*isVariadic=*/false);
+ auto *Fn = llvm::Function::Create(
+ CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+ ".omp.reduction.reduction_func", &CGM.getModule());
+ CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn);
+ CodeGenFunction CGF(CGM);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
+
+ // Dst = (void*[n])(LHSArg);
+ // Src = (void*[n])(RHSArg);
+ auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
+ CGF.PointerAlignInBytes),
+ ArgsType);
+ auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
+ CGF.PointerAlignInBytes),
+ ArgsType);
+
+ // ...
+ // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
+ // ...
+ CodeGenFunction::OMPPrivateScope Scope(CGF);
+ for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
+ Scope.addPrivate(
+ cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
+ [&]() -> llvm::Value *{
+ return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateAlignedLoad(
+ CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
+ CGM.PointerAlignInBytes),
+ CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
+ });
+ Scope.addPrivate(
+ cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
+ [&]() -> llvm::Value *{
+ return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.Builder.CreateAlignedLoad(
+ CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
+ CGM.PointerAlignInBytes),
+ CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
+ });
+ }
+ Scope.Privatize();
+ for (auto *E : ReductionOps) {
+ CGF.EmitIgnoredExpr(E);
+ }
+ Scope.ForceCleanup();
+ CGF.FinishFunction();
+ return Fn;
+}
+
+void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps,
+ bool WithNowait) {
+ // Next code should be emitted for reduction:
+ //
+ // static kmp_critical_name lock = { 0 };
+ //
+ // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
+ // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
+ // ...
+ // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
+ // *(Type<n>-1*)rhs[<n>-1]);
+ // }
+ //
+ // ...
+ // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
+ // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
+ // RedList, reduce_func, &<lock>)) {
+ // case 1:
+ // ...
+ // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+ // ...
+ // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+ // break;
+ // case 2:
+ // ...
+ // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
+ // ...
+ // break;
+ // default:;
+ // }
+
+ auto &C = CGM.getContext();
+
+ // 1. Build a list of reduction variables.
+ // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+ llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size());
+ QualType ReductionArrayTy =
+ C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
+ /*IndexTypeQuals=*/0);
+ auto *ReductionList =
+ CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+ for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
+ auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
+ CGF.Builder.CreateAlignedStore(
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
+ Elem, CGM.PointerAlignInBytes);
+ }
+
+ // 2. Emit reduce_func().
+ auto *ReductionFn = emitReductionFunction(
+ CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs,
+ RHSExprs, ReductionOps);
+
+ // 3. Create static kmp_critical_name lock = { 0 };
+ auto *Lock = getCriticalRegionLock(".reduction");
+
+ // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
+ // RedList, reduce_func, &<lock>);
+ auto *IdentTLoc = emitUpdateLocation(
+ CGF, Loc,
+ static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE));
+ auto *ThreadId = getThreadID(CGF, Loc);
+ auto *ReductionArrayTySize = llvm::ConstantInt::get(
+ CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
+ auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
+ CGF.VoidPtrTy);
+ llvm::Value *Args[] = {
+ IdentTLoc, // ident_t *<loc>
+ ThreadId, // i32 <gtid>
+ CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
+ ReductionArrayTySize, // size_type sizeof(RedList)
+ RL, // void *RedList
+ ReductionFn, // void (*) (void *, void *) <reduce_func>
+ Lock // kmp_critical_name *&<lock>
+ };
+ auto Res = CGF.EmitRuntimeCall(
+ createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait
+ : OMPRTL__kmpc_reduce),
+ Args);
+
+ // 5. Build switch(res)
+ auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
+ auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
+
+ // 6. Build case 1:
+ // ...
+ // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+ // ...
+ // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+ // break;
+ auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
+ SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
+ CGF.EmitBlock(Case1BB);
+
+ {
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+ llvm::Value *EndArgs[] = {
+ IdentTLoc, // ident_t *<loc>
+ ThreadId, // i32 <gtid>
+ Lock // kmp_critical_name *&<lock>
+ };
+ CGF.EHStack.pushCleanup<CallEndCleanup>(
+ NormalAndEHCleanup,
+ createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait
+ : OMPRTL__kmpc_end_reduce),
+ llvm::makeArrayRef(EndArgs));
+ for (auto *E : ReductionOps) {
+ CGF.EmitIgnoredExpr(E);
+ }
+ }
+
+ CGF.EmitBranch(DefaultBB);
+
+ // 7. Build case 2:
+ // ...
+ // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
+ // ...
+ // break;
+ auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
+ SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
+ CGF.EmitBlock(Case2BB);
+
+ {
+ CodeGenFunction::RunCleanupsScope Scope(CGF);
+ auto I = LHSExprs.begin();
+ for (auto *E : ReductionOps) {
+ const Expr *XExpr = nullptr;
+ const Expr *EExpr = nullptr;
+ const Expr *UpExpr = nullptr;
+ BinaryOperatorKind BO = BO_Comma;
+ // Try to emit update expression as a simple atomic.
+ if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) {
+ // If this is a conditional operator, analyze it's condition for
+ // min/max reduction operator.
+ E = ACO->getCond();
+ }
+ if (auto *BO = dyn_cast<BinaryOperator>(E)) {
+ if (BO->getOpcode() == BO_Assign) {
+ XExpr = BO->getLHS();
+ UpExpr = BO->getRHS();
+ }
+ }
+ // Analyze RHS part of the whole expression.
+ if (UpExpr) {
+ if (auto *BORHS =
+ dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) {
+ EExpr = BORHS->getRHS();
+ BO = BORHS->getOpcode();
+ }
+ }
+ if (XExpr) {
+ auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
+ LValue X = CGF.EmitLValue(XExpr);
+ RValue E;
+ if (EExpr)
+ E = CGF.EmitAnyExpr(EExpr);
+ CGF.EmitOMPAtomicSimpleUpdateExpr(
+ X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc,
+ [&CGF, UpExpr, VD](RValue XRValue) {
+ CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
+ PrivateScope.addPrivate(
+ VD, [&CGF, VD, XRValue]() -> llvm::Value *{
+ auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
+ CGF.EmitStoreThroughLValue(
+ XRValue,
+ CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
+ return LHSTemp;
+ });
+ (void)PrivateScope.Privatize();
+ return CGF.EmitAnyExpr(UpExpr);
+ });
+ } else {
+ // Emit as a critical region.
+ emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) {
+ CGF.EmitIgnoredExpr(E);
+ }, Loc);
+ }
+ ++I;
+ }
+ }
+
+ CGF.EmitBranch(DefaultBB);
+ CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
+}
+
void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
const RegionCodeGenTy &CodeGen) {
InlinedOpenMPRegionRAII Region(CGF, CodeGen);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 1b91d02e3b1..f6326171a7b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -103,6 +103,21 @@ private:
// kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
// kmp_int32 didit);
OMPRTL__kmpc_copyprivate,
+ // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
+ // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
+ // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
+ OMPRTL__kmpc_reduce,
+ // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
+ // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
+ // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
+ // *lck);
+ OMPRTL__kmpc_reduce_nowait,
+ // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
+ // kmp_critical_name *lck);
+ OMPRTL__kmpc_end_reduce,
+ // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
+ // kmp_critical_name *lck);
+ OMPRTL__kmpc_end_reduce_nowait,
};
/// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -511,11 +526,54 @@ public:
llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
llvm::Value *TaskFunction, QualType SharedsTy,
llvm::Value *Shareds);
+
/// \brief Emit code for the directive that does not require outlining.
///
/// \param CodeGen Code generation sequence for the \a D directive.
virtual void emitInlinedDirective(CodeGenFunction &CGF,
const RegionCodeGenTy &CodeGen);
+ /// \brief Emit a code for reduction clause. Next code should be emitted for
+ /// reduction:
+ /// \code
+ ///
+ /// static kmp_critical_name lock = { 0 };
+ ///
+ /// void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
+ /// ...
+ /// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
+ /// ...
+ /// }
+ ///
+ /// ...
+ /// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
+ /// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
+ /// RedList, reduce_func, &<lock>)) {
+ /// case 1:
+ /// ...
+ /// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
+ /// ...
+ /// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
+ /// break;
+ /// case 2:
+ /// ...
+ /// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
+ /// ...
+ /// break;
+ /// default:;
+ /// }
+ /// \endcode
+ ///
+ /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+ /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+ /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+ /// or 'operator binop(LHS, RHS)'.
+ /// \param WithNowait true if parent directive has also nowait clause, false
+ /// otherwise.
+ virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+ ArrayRef<const Expr *> LHSExprs,
+ ArrayRef<const Expr *> RHSExprs,
+ ArrayRef<const Expr *> ReductionOps,
+ bool WithNowait);
};
} // namespace CodeGen
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 02a02de2cc9..f46999084b3 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -209,6 +209,73 @@ void CodeGenFunction::EmitOMPPrivateClause(
}
}
+void CodeGenFunction::EmitOMPReductionClauseInit(
+ const OMPExecutableDirective &D,
+ CodeGenFunction::OMPPrivateScope &PrivateScope) {
+ auto ReductionFilter = [](const OMPClause *C) -> bool {
+ return C->getClauseKind() == OMPC_reduction;
+ };
+ for (OMPExecutableDirective::filtered_clause_iterator<decltype(
+ ReductionFilter)> I(D.clauses(), ReductionFilter);
+ I; ++I) {
+ auto *C = cast<OMPReductionClause>(*I);
+ auto ILHS = C->lhs_exprs().begin();
+ auto IRHS = C->rhs_exprs().begin();
+ for (auto IRef : C->varlists()) {
+ auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+ auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ // Store the address of the original variable associated with the LHS
+ // implicit variable.
+ PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{
+ DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
+ CapturedStmtInfo->lookup(OrigVD) != nullptr,
+ IRef->getType(), VK_LValue, IRef->getExprLoc());
+ return EmitLValue(&DRE).getAddress();
+ });
+ // Emit reduction copy.
+ bool IsRegistered =
+ PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{
+ // Emit private VarDecl with reduction init.
+ EmitDecl(*PrivateVD);
+ return GetAddrOfLocalVar(PrivateVD);
+ });
+ assert(IsRegistered && "private var already registered as private");
+ // Silence the warning about unused variable.
+ (void)IsRegistered;
+ ++ILHS, ++IRHS;
+ }
+ }
+}
+
+void CodeGenFunction::EmitOMPReductionClauseFinal(
+ const OMPExecutableDirective &D) {
+ llvm::SmallVector<const Expr *, 8> LHSExprs;
+ llvm::SmallVector<const Expr *, 8> RHSExprs;
+ llvm::SmallVector<const Expr *, 8> ReductionOps;
+ auto ReductionFilter = [](const OMPClause *C) -> bool {
+ return C->getClauseKind() == OMPC_reduction;
+ };
+ bool HasAtLeastOneReduction = false;
+ for (OMPExecutableDirective::filtered_clause_iterator<decltype(
+ ReductionFilter)> I(D.clauses(), ReductionFilter);
+ I; ++I) {
+ HasAtLeastOneReduction = true;
+ auto *C = cast<OMPReductionClause>(*I);
+ LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ }
+ if (HasAtLeastOneReduction) {
+ // Emit nowait reduction if nowait clause is present or directive is a
+ // parallel directive (it always has implicit barrier).
+ CGM.getOpenMPRuntime().emitReduction(
+ *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps,
+ D.getSingleClause(OMPC_nowait) ||
+ isOpenMPParallelDirective(D.getDirectiveKind()));
+ }
+}
+
/// \brief Emits code for OpenMP parallel directive in the parallel region.
static void emitOMPParallelCall(CodeGenFunction &CGF,
const OMPExecutableDirective &S,
@@ -253,11 +320,13 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPPrivateClause(S, PrivateScope);
CGF.EmitOMPFirstprivateClause(S, PrivateScope);
+ CGF.EmitOMPReductionClauseInit(S, PrivateScope);
if (PrivateScope.Privatize())
// Emit implicit barrier to synchronize threads and avoid data races.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
OMPD_unknown);
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
+ CGF.EmitOMPReductionClauseFinal(S);
// Emit implicit barrier at the end of the 'parallel' directive.
CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(),
OMPD_unknown);
@@ -1155,73 +1224,113 @@ static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
}
-static Optional<llvm::AtomicRMWInst::BinOp>
-getCompatibleAtomicRMWBinOp(ASTContext &Context, BinaryOperatorKind Op,
- bool IsXLHSInRHSPart, LValue XLValue,
- RValue ExprRValue) {
- Optional<llvm::AtomicRMWInst::BinOp> RMWOp;
- // Allow atomicrmw only if 'x' and 'expr' are integer values, lvalue for 'x'
+bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update,
+ BinaryOperatorKind BO, llvm::AtomicOrdering AO,
+ bool IsXLHSInRHSPart) {
+ auto &Context = CGF.CGM.getContext();
+ // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
// expression is simple and atomic is allowed for the given type for the
// target platform.
- if (ExprRValue.isScalar() &&
- ExprRValue.getScalarVal()->getType()->isIntegerTy() &&
- XLValue.isSimple() &&
- (isa<llvm::ConstantInt>(ExprRValue.getScalarVal()) ||
- (ExprRValue.getScalarVal()->getType() ==
- XLValue.getAddress()->getType()->getPointerElementType())) &&
- Context.getTargetInfo().hasBuiltinAtomic(
- Context.getTypeSize(XLValue.getType()),
- Context.toBits(XLValue.getAlignment()))) {
- switch (Op) {
- case BO_Add:
- RMWOp = llvm::AtomicRMWInst::Add;
- break;
- case BO_Sub:
- if (IsXLHSInRHSPart) {
- RMWOp = llvm::AtomicRMWInst::Sub;
- }
- break;
- case BO_And:
- RMWOp = llvm::AtomicRMWInst::And;
- break;
- case BO_Or:
- RMWOp = llvm::AtomicRMWInst::Or;
- break;
- case BO_Xor:
- RMWOp = llvm::AtomicRMWInst::Xor;
- break;
- case BO_Mul:
- case BO_Div:
- case BO_Rem:
- case BO_Shl:
- case BO_Shr:
- break;
- case BO_PtrMemD:
- case BO_PtrMemI:
- case BO_LT:
- case BO_GT:
- case BO_LE:
- case BO_GE:
- case BO_EQ:
- case BO_NE:
- case BO_LAnd:
- case BO_LOr:
- case BO_Assign:
- case BO_MulAssign:
- case BO_DivAssign:
- case BO_RemAssign:
- case BO_AddAssign:
- case BO_SubAssign:
- case BO_ShlAssign:
- case BO_ShrAssign:
- case BO_AndAssign:
- case BO_XorAssign:
- case BO_OrAssign:
- case BO_Comma:
- llvm_unreachable("Unexpected binary operation in 'atomic update'.");
+ if (BO == BO_Comma || !Update.isScalar() ||
+ !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() ||
+ (!isa<llvm::ConstantInt>(Update.getScalarVal()) &&
+ (Update.getScalarVal()->getType() !=
+ X.getAddress()->getType()->getPointerElementType())) ||
+ !Context.getTargetInfo().hasBuiltinAtomic(
+ Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment())))
+ return false;
+
+ llvm::AtomicRMWInst::BinOp RMWOp;
+ switch (BO) {
+ case BO_Add:
+ RMWOp = llvm::AtomicRMWInst::Add;
+ break;
+ case BO_Sub:
+ if (!IsXLHSInRHSPart)
+ return false;
+ RMWOp = llvm::AtomicRMWInst::Sub;
+ break;
+ case BO_And:
+ RMWOp = llvm::AtomicRMWInst::And;
+ break;
+ case BO_Or:
+ RMWOp = llvm::AtomicRMWInst::Or;
+ break;
+ case BO_Xor:
+ RMWOp = llvm::AtomicRMWInst::Xor;
+ break;
+ case BO_LT:
+ RMWOp = X.getType()->hasSignedIntegerRepresentation()
+ ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
+ : llvm::AtomicRMWInst::Max)
+ : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
+ : llvm::AtomicRMWInst::UMax);
+ break;
+ case BO_GT:
+ RMWOp = X.getType()->hasSignedIntegerRepresentation()
+ ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
+ : llvm::AtomicRMWInst::Min)
+ : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
+ : llvm::AtomicRMWInst::UMin);
+ break;
+ case BO_Mul:
+ case BO_Div:
+ case BO_Rem:
+ case BO_Shl:
+ case BO_Shr:
+ case BO_LAnd:
+ case BO_LOr:
+ return false;
+ case BO_PtrMemD:
+ case BO_PtrMemI:
+ case BO_LE:
+ case BO_GE:
+ case BO_EQ:
+ case BO_NE:
+ case BO_Assign:
+ case BO_AddAssign:
+ case BO_SubAssign:
+ case BO_AndAssign:
+ case BO_OrAssign:
+ case BO_XorAssign:
+ case BO_MulAssign:
+ case BO_DivAssign:
+ case BO_RemAssign:
+ case BO_ShlAssign:
+ case BO_ShrAssign:
+ case BO_Comma:
+ llvm_unreachable("Unsupported atomic update operation");
+ }
+ auto *UpdateVal = Update.getScalarVal();
+ if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) {
+ UpdateVal = CGF.Builder.CreateIntCast(
+ IC, X.getAddress()->getType()->getPointerElementType(),
+ X.getType()->hasSignedIntegerRepresentation());
+ }
+ CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
+ return true;
+}
+
+void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
+ LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
+ llvm::AtomicOrdering AO, SourceLocation Loc,
+ const llvm::function_ref<RValue(RValue)> &CommonGen) {
+ // Update expressions are allowed to have the following forms:
+ // x binop= expr; -> xrval + expr;
+ // x++, ++x -> xrval + 1;
+ // x--, --x -> xrval - 1;
+ // x = x binop expr; -> xrval binop expr
+ // x = expr Op x; - > expr binop xrval;
+ if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) {
+ if (X.isGlobalReg()) {
+ // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
+ // 'xrval'.
+ EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X);
+ } else {
+ // Perform compare-and-swap procedure.
+ EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified());
}
}
- return std::move(RMWOp);
}
static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
@@ -1237,42 +1346,22 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
// x--, --x -> xrval - 1;
// x = x binop expr; -> xrval binop expr
// x = expr Op x; - > expr binop xrval;
- assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
+ assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
LValue XLValue = CGF.EmitLValue(X);
RValue ExprRValue = CGF.EmitAnyExpr(E);
- const auto &Op =
- getCompatibleAtomicRMWBinOp(CGF.CGM.getContext(), BOUE->getOpcode(),
- IsXLHSInRHSPart, XLValue, ExprRValue);
auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic;
- if (Op) {
- auto *ExprVal = ExprRValue.getScalarVal();
- if (auto *IC = dyn_cast<llvm::ConstantInt>(ExprVal)) {
- ExprVal = CGF.Builder.CreateIntCast(
- IC, XLValue.getAddress()->getType()->getPointerElementType(),
- XLValue.getType()->hasSignedIntegerRepresentation());
- }
- CGF.Builder.CreateAtomicRMW(*Op, XLValue.getAddress(), ExprVal, AO);
- } else {
- auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
- auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
- CodeGenFunction::OpaqueValueMapping MapExpr(
- CGF, IsXLHSInRHSPart ? RHS : LHS, ExprRValue);
- auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
- if (XLValue.isGlobalReg()) {
- // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
- // 'xrval'.
- CodeGenFunction::OpaqueValueMapping MapX(
- CGF, XRValExpr, CGF.EmitLoadOfLValue(XLValue, Loc));
- CGF.EmitStoreThroughLValue(CGF.EmitAnyExpr(UE), XLValue);
- } else {
- // Perform compare-and-swap procedure.
- CGF.EmitAtomicUpdate(
- XLValue, AO, [&CGF, &UE, &XRValExpr](RValue XRVal) -> RValue {
- CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRVal);
- return CGF.EmitAnyExpr(UE);
- }, /*IsVolatile=*/false);
- }
- }
+ auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
+ auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
+ auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
+ auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
+ auto Gen =
+ [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue {
+ CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
+ CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
+ return CGF.EmitAnyExpr(UE);
+ };
+ CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(),
+ IsXLHSInRHSPart, AO, Loc, Gen);
// OpenMP, 2.12.6, atomic Construct
// Any atomic construct with a seq_cst clause forces the atomically
// performed operation to include an implicit flush operation without a
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 0ab1e5ea8b1..7cf1521fcd0 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2032,10 +2032,39 @@ public:
void EmitOMPAggregateAssign(LValue OriginalAddr, llvm::Value *PrivateAddr,
const Expr *AssignExpr, QualType Type,
const VarDecl *VDInit);
+ /// \brief Emit atomic update code for constructs: \a X = \a X \a BO \a E or
+ /// \a X = \a E \a BO \a E.
+ ///
+ /// \param X Value to be updated.
+ /// \param E Update value.
+ /// \param BO Binary operation for update operation.
+ /// \param IsXLHSInRHSPart true if \a X is LHS in RHS part of the update
+ /// expression, false otherwise.
+ /// \param AO Atomic ordering of the generated atomic instructions.
+ /// \param CommonGen Code generator for complex expressions that cannot be
+ /// expressed through atomicrmw instruction.
+ void EmitOMPAtomicSimpleUpdateExpr(
+ LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
+ llvm::AtomicOrdering AO, SourceLocation Loc,
+ const llvm::function_ref<RValue(RValue)> &CommonGen);
void EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
void EmitOMPPrivateClause(const OMPExecutableDirective &D,
OMPPrivateScope &PrivateScope);
+ /// \brief Emit initial code for reduction variables. Creates reduction copies
+ /// and initializes them with the values according to OpenMP standard.
+ ///
+ /// \param D Directive (possibly) with the 'reduction' clause.
+ /// \param PrivateScope Private scope for capturing reduction variables for
+ /// proper codegen in internal captured statement.
+ ///
+ void EmitOMPReductionClauseInit(const OMPExecutableDirective &D,
+ OMPPrivateScope &PrivateScope);
+ /// \brief Emit final update of reduction values to original variables at
+ /// the end of the directive.
+ ///
+ /// \param D Directive that has at least one 'reduction' directives.
+ void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D);
void EmitOMPParallelDirective(const OMPParallelDirective &S);
void EmitOMPSimdDirective(const OMPSimdDirective &S);
OpenPOWER on IntegriCloud