diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2015-04-10 10:43:45 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-04-10 10:43:45 +0000 |
commit | 794ba0dcb7baf17371445b788bf245441d1bbc3d (patch) | |
tree | 46c092f1660597a696f87baa74067755f42514bb /clang/lib/CodeGen | |
parent | 8ad3627e194c9da8f748df8abaa4c0e469a953a8 (diff) | |
download | bcm5719-llvm-794ba0dcb7baf17371445b788bf245441d1bbc3d.tar.gz bcm5719-llvm-794ba0dcb7baf17371445b788bf245441d1bbc3d.zip |
[OPENMP] Codegen for 'reduction' clause in 'parallel' directive.
Emit a code for reduction clause. Next code should be emitted for reductions:
static kmp_critical_name lock = { 0 };
void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
...
*(Type<i> *)lhs[i] = RedOp<i>(*(Type<i> *)lhs[i], *(Type<i> *)rhs[i]);
...
}
... void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n> - 1]};
switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), RedList, reduce_func, &<lock>)) {
case 1:
...
<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
...
__kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
break;
case 2:
...
Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
...
break;
default:
;
}
Reduction variables are a kind of a private variables, they have private copies, but initial values are chosen in accordance with the reduction operation.
Differential Revision: http://reviews.llvm.org/D8915
llvm-svn: 234583
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 355 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 58 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 281 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 29 |
4 files changed, 592 insertions, 131 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 2b9fbd66613..bb00a0cd375 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -657,6 +657,62 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate"); break; } + case OMPRTL__kmpc_reduce: { + // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void + // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); + llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, + CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), + llvm::PointerType::getUnqual(KmpCriticalNameTy)}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce"); + break; + } + case OMPRTL__kmpc_reduce_nowait: { + // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 + // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, + // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name + // *lck); + llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams, + /*isVarArg=*/false); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy, + CGM.VoidPtrTy, ReduceFnTy->getPointerTo(), + llvm::PointerType::getUnqual(KmpCriticalNameTy)}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait"); + break; + } + case OMPRTL__kmpc_end_reduce: { + // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *lck); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, + llvm::PointerType::getUnqual(KmpCriticalNameTy)}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce"); + break; + } + case OMPRTL__kmpc_end_reduce_nowait: { + // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *lck); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), CGM.Int32Ty, + llvm::PointerType::getUnqual(KmpCriticalNameTy)}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = + CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait"); + break; + } } return RTLFn; } @@ -998,11 +1054,11 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), getCriticalRegionLock(CriticalName)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); - emitInlinedDirective(CGF, CriticalOpGen); // Build a call to __kmpc_end_critical CGF.EHStack.pushCleanup<CallEndCleanup>( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), llvm::makeArrayRef(Args)); + emitInlinedDirective(CGF, CriticalOpGen); } } @@ -1037,26 +1093,10 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); emitIfStmt(CGF, IsMaster, [&](CodeGenFunction &CGF) -> void { CodeGenFunction::RunCleanupsScope Scope(CGF); - MasterOpGen(CGF); - // Build a call to __kmpc_end_master. - // OpenMP [1.2.2 OpenMP Language Terminology] - // For C/C++, an executable statement, possibly compound, with a single - // entry at the top and a single exit at the bottom, or an OpenMP - // construct. - // * Access to the structured block must not be the result of a branch. - // * The point of exit cannot be a branch out of the structured block. - // * The point of entry must not be a call to setjmp(). - // * longjmp() and throw() must not violate the entry/exit criteria. - // * An expression statement, iteration statement, selection statement, or - // try block is considered to be a structured block if the corresponding - // compound statement obtained by enclosing it in { and } would be a - // structured block. - // It is analyzed in Sema, so we can just call __kmpc_end_master() on - // fallthrough rather than pushing a normal cleanup for it. - // Build a call to __kmpc_end_critical CGF.EHStack.pushCleanup<CallEndCleanup>( NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), llvm::makeArrayRef(Args)); + MasterOpGen(CGF); }); } @@ -1167,29 +1207,15 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); emitIfStmt(CGF, IsSingle, [&](CodeGenFunction &CGF) -> void { CodeGenFunction::RunCleanupsScope Scope(CGF); + CGF.EHStack.pushCleanup<CallEndCleanup>( + NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), + llvm::makeArrayRef(Args)); SingleOpGen(CGF); if (DidIt) { // did_it = 1; CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt, DidIt->getAlignment()); } - // Build a call to __kmpc_end_single. - // OpenMP [1.2.2 OpenMP Language Terminology] - // For C/C++, an executable statement, possibly compound, with a single - // entry at the top and a single exit at the bottom, or an OpenMP construct. - // * Access to the structured block must not be the result of a branch. - // * The point of exit cannot be a branch out of the structured block. - // * The point of entry must not be a call to setjmp(). - // * longjmp() and throw() must not violate the entry/exit criteria. - // * An expression statement, iteration statement, selection statement, or - // try block is considered to be a structured block if the corresponding - // compound statement obtained by enclosing it in { and } would be a - // structured block. - // It is analyzed in Sema, so we can just call __kmpc_end_single() on - // fallthrough rather than pushing a normal cleanup for it. - CGF.EHStack.pushCleanup<CallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), - llvm::makeArrayRef(Args)); }); // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, // <copy_func>, did_it); @@ -1615,6 +1641,265 @@ void CGOpenMPRuntime::emitTaskCall( CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } +static llvm::Value *emitReductionFunction(CodeGenModule &CGM, + llvm::Type *ArgsType, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps) { + auto &C = CGM.getContext(); + + // void reduction_func(void *LHSArg, void *RHSArg); + FunctionArgList Args; + ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, + C.VoidPtrTy); + ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, SourceLocation(), /*Id=*/nullptr, + C.VoidPtrTy); + Args.push_back(&LHSArg); + Args.push_back(&RHSArg); + FunctionType::ExtInfo EI; + auto &CGFI = CGM.getTypes().arrangeFreeFunctionDeclaration( + C.VoidTy, Args, EI, /*isVariadic=*/false); + auto *Fn = llvm::Function::Create( + CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, + ".omp.reduction.reduction_func", &CGM.getModule()); + CGM.SetLLVMFunctionAttributes(/*D=*/nullptr, CGFI, Fn); + CodeGenFunction CGF(CGM); + CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args); + + // Dst = (void*[n])(LHSArg); + // Src = (void*[n])(RHSArg); + auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg), + CGF.PointerAlignInBytes), + ArgsType); + auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg), + CGF.PointerAlignInBytes), + ArgsType); + + // ... + // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); + // ... + CodeGenFunction::OMPPrivateScope Scope(CGF); + for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) { + Scope.addPrivate( + cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()), + [&]() -> llvm::Value *{ + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad( + CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I), + CGM.PointerAlignInBytes), + CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType()))); + }); + Scope.addPrivate( + cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()), + [&]() -> llvm::Value *{ + return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.Builder.CreateAlignedLoad( + CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I), + CGM.PointerAlignInBytes), + CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType()))); + }); + } + Scope.Privatize(); + for (auto *E : ReductionOps) { + CGF.EmitIgnoredExpr(E); + } + Scope.ForceCleanup(); + CGF.FinishFunction(); + return Fn; +} + +void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, + bool WithNowait) { + // Next code should be emitted for reduction: + // + // static kmp_critical_name lock = { 0 }; + // + // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { + // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); + // ... + // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], + // *(Type<n>-1*)rhs[<n>-1]); + // } + // + // ... + // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; + // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), + // RedList, reduce_func, &<lock>)) { + // case 1: + // ... + // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); + // ... + // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); + // break; + // case 2: + // ... + // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); + // ... + // break; + // default:; + // } + + auto &C = CGM.getContext(); + + // 1. Build a list of reduction variables. + // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; + llvm::APInt ArraySize(/*unsigned int numBits=*/32, RHSExprs.size()); + QualType ReductionArrayTy = + C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal, + /*IndexTypeQuals=*/0); + auto *ReductionList = + CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); + for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) { + auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I); + CGF.Builder.CreateAlignedStore( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy), + Elem, CGM.PointerAlignInBytes); + } + + // 2. Emit reduce_func(). + auto *ReductionFn = emitReductionFunction( + CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), LHSExprs, + RHSExprs, ReductionOps); + + // 3. Create static kmp_critical_name lock = { 0 }; + auto *Lock = getCriticalRegionLock(".reduction"); + + // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), + // RedList, reduce_func, &<lock>); + auto *IdentTLoc = emitUpdateLocation( + CGF, Loc, + static_cast<OpenMPLocationFlags>(OMP_IDENT_KMPC | OMP_ATOMIC_REDUCE)); + auto *ThreadId = getThreadID(CGF, Loc); + auto *ReductionArrayTySize = llvm::ConstantInt::get( + CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity()); + auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, + CGF.VoidPtrTy); + llvm::Value *Args[] = { + IdentTLoc, // ident_t *<loc> + ThreadId, // i32 <gtid> + CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> + ReductionArrayTySize, // size_type sizeof(RedList) + RL, // void *RedList + ReductionFn, // void (*) (void *, void *) <reduce_func> + Lock // kmp_critical_name *&<lock> + }; + auto Res = CGF.EmitRuntimeCall( + createRuntimeFunction(WithNowait ? OMPRTL__kmpc_reduce_nowait + : OMPRTL__kmpc_reduce), + Args); + + // 5. Build switch(res) + auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); + auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); + + // 6. Build case 1: + // ... + // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); + // ... + // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); + // break; + auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); + SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); + CGF.EmitBlock(Case1BB); + + { + CodeGenFunction::RunCleanupsScope Scope(CGF); + // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); + llvm::Value *EndArgs[] = { + IdentTLoc, // ident_t *<loc> + ThreadId, // i32 <gtid> + Lock // kmp_critical_name *&<lock> + }; + CGF.EHStack.pushCleanup<CallEndCleanup>( + NormalAndEHCleanup, + createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait + : OMPRTL__kmpc_end_reduce), + llvm::makeArrayRef(EndArgs)); + for (auto *E : ReductionOps) { + CGF.EmitIgnoredExpr(E); + } + } + + CGF.EmitBranch(DefaultBB); + + // 7. Build case 2: + // ... + // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); + // ... + // break; + auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); + SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); + CGF.EmitBlock(Case2BB); + + { + CodeGenFunction::RunCleanupsScope Scope(CGF); + auto I = LHSExprs.begin(); + for (auto *E : ReductionOps) { + const Expr *XExpr = nullptr; + const Expr *EExpr = nullptr; + const Expr *UpExpr = nullptr; + BinaryOperatorKind BO = BO_Comma; + // Try to emit update expression as a simple atomic. + if (auto *ACO = dyn_cast<AbstractConditionalOperator>(E)) { + // If this is a conditional operator, analyze it's condition for + // min/max reduction operator. + E = ACO->getCond(); + } + if (auto *BO = dyn_cast<BinaryOperator>(E)) { + if (BO->getOpcode() == BO_Assign) { + XExpr = BO->getLHS(); + UpExpr = BO->getRHS(); + } + } + // Analyze RHS part of the whole expression. + if (UpExpr) { + if (auto *BORHS = + dyn_cast<BinaryOperator>(UpExpr->IgnoreParenImpCasts())) { + EExpr = BORHS->getRHS(); + BO = BORHS->getOpcode(); + } + } + if (XExpr) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()); + LValue X = CGF.EmitLValue(XExpr); + RValue E; + if (EExpr) + E = CGF.EmitAnyExpr(EExpr); + CGF.EmitOMPAtomicSimpleUpdateExpr( + X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, + [&CGF, UpExpr, VD](RValue XRValue) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate( + VD, [&CGF, VD, XRValue]() -> llvm::Value *{ + auto *LHSTemp = CGF.CreateMemTemp(VD->getType()); + CGF.EmitStoreThroughLValue( + XRValue, + CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType())); + return LHSTemp; + }); + (void)PrivateScope.Privatize(); + return CGF.EmitAnyExpr(UpExpr); + }); + } else { + // Emit as a critical region. + emitCriticalRegion(CGF, ".atomic_reduction", [E](CodeGenFunction &CGF) { + CGF.EmitIgnoredExpr(E); + }, Loc); + } + ++I; + } + } + + CGF.EmitBranch(DefaultBB); + CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); +} + void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen) { InlinedOpenMPRegionRAII Region(CGF, CodeGen); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 1b91d02e3b1..f6326171a7b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -103,6 +103,21 @@ private: // kmp_int32 cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), // kmp_int32 didit); OMPRTL__kmpc_copyprivate, + // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, + // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void + // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck); + OMPRTL__kmpc_reduce, + // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 + // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, + // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name + // *lck); + OMPRTL__kmpc_reduce_nowait, + // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *lck); + OMPRTL__kmpc_end_reduce, + // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, + // kmp_critical_name *lck); + OMPRTL__kmpc_end_reduce_nowait, }; /// \brief Values for bit flags used in the ident_t to describe the fields. @@ -511,11 +526,54 @@ public: llvm::PointerIntPair<llvm::Value *, 1, bool> Final, llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds); + /// \brief Emit code for the directive that does not require outlining. /// /// \param CodeGen Code generation sequence for the \a D directive. virtual void emitInlinedDirective(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen); + /// \brief Emit a code for reduction clause. Next code should be emitted for + /// reduction: + /// \code + /// + /// static kmp_critical_name lock = { 0 }; + /// + /// void reduce_func(void *lhs[<n>], void *rhs[<n>]) { + /// ... + /// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); + /// ... + /// } + /// + /// ... + /// void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; + /// switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), + /// RedList, reduce_func, &<lock>)) { + /// case 1: + /// ... + /// <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); + /// ... + /// __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); + /// break; + /// case 2: + /// ... + /// Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); + /// ... + /// break; + /// default:; + /// } + /// \endcode + /// + /// \param LHSExprs List of LHS in \a ReductionOps reduction operations. + /// \param RHSExprs List of RHS in \a ReductionOps reduction operations. + /// \param ReductionOps List of reduction operations in form 'LHS binop RHS' + /// or 'operator binop(LHS, RHS)'. + /// \param WithNowait true if parent directive has also nowait clause, false + /// otherwise. + virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, + ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, + ArrayRef<const Expr *> ReductionOps, + bool WithNowait); }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 02a02de2cc9..f46999084b3 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -209,6 +209,73 @@ void CodeGenFunction::EmitOMPPrivateClause( } } +void CodeGenFunction::EmitOMPReductionClauseInit( + const OMPExecutableDirective &D, + CodeGenFunction::OMPPrivateScope &PrivateScope) { + auto ReductionFilter = [](const OMPClause *C) -> bool { + return C->getClauseKind() == OMPC_reduction; + }; + for (OMPExecutableDirective::filtered_clause_iterator<decltype( + ReductionFilter)> I(D.clauses(), ReductionFilter); + I; ++I) { + auto *C = cast<OMPReductionClause>(*I); + auto ILHS = C->lhs_exprs().begin(); + auto IRHS = C->rhs_exprs().begin(); + for (auto IRef : C->varlists()) { + auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); + auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + // Store the address of the original variable associated with the LHS + // implicit variable. + PrivateScope.addPrivate(LHSVD, [this, OrigVD, IRef]() -> llvm::Value *{ + DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD), + CapturedStmtInfo->lookup(OrigVD) != nullptr, + IRef->getType(), VK_LValue, IRef->getExprLoc()); + return EmitLValue(&DRE).getAddress(); + }); + // Emit reduction copy. + bool IsRegistered = + PrivateScope.addPrivate(OrigVD, [this, PrivateVD]() -> llvm::Value *{ + // Emit private VarDecl with reduction init. + EmitDecl(*PrivateVD); + return GetAddrOfLocalVar(PrivateVD); + }); + assert(IsRegistered && "private var already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + ++ILHS, ++IRHS; + } + } +} + +void CodeGenFunction::EmitOMPReductionClauseFinal( + const OMPExecutableDirective &D) { + llvm::SmallVector<const Expr *, 8> LHSExprs; + llvm::SmallVector<const Expr *, 8> RHSExprs; + llvm::SmallVector<const Expr *, 8> ReductionOps; + auto ReductionFilter = [](const OMPClause *C) -> bool { + return C->getClauseKind() == OMPC_reduction; + }; + bool HasAtLeastOneReduction = false; + for (OMPExecutableDirective::filtered_clause_iterator<decltype( + ReductionFilter)> I(D.clauses(), ReductionFilter); + I; ++I) { + HasAtLeastOneReduction = true; + auto *C = cast<OMPReductionClause>(*I); + LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + } + if (HasAtLeastOneReduction) { + // Emit nowait reduction if nowait clause is present or directive is a + // parallel directive (it always has implicit barrier). + CGM.getOpenMPRuntime().emitReduction( + *this, D.getLocEnd(), LHSExprs, RHSExprs, ReductionOps, + D.getSingleClause(OMPC_nowait) || + isOpenMPParallelDirective(D.getDirectiveKind())); + } +} + /// \brief Emits code for OpenMP parallel directive in the parallel region. static void emitOMPParallelCall(CodeGenFunction &CGF, const OMPExecutableDirective &S, @@ -253,11 +320,13 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { OMPPrivateScope PrivateScope(CGF); CGF.EmitOMPPrivateClause(S, PrivateScope); CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPReductionClauseInit(S, PrivateScope); if (PrivateScope.Privatize()) // Emit implicit barrier to synchronize threads and avoid data races. CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), OMPD_unknown); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + CGF.EmitOMPReductionClauseFinal(S); // Emit implicit barrier at the end of the 'parallel' directive. CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getLocStart(), OMPD_unknown); @@ -1155,73 +1224,113 @@ static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); } -static Optional<llvm::AtomicRMWInst::BinOp> -getCompatibleAtomicRMWBinOp(ASTContext &Context, BinaryOperatorKind Op, - bool IsXLHSInRHSPart, LValue XLValue, - RValue ExprRValue) { - Optional<llvm::AtomicRMWInst::BinOp> RMWOp; - // Allow atomicrmw only if 'x' and 'expr' are integer values, lvalue for 'x' +bool emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, RValue Update, + BinaryOperatorKind BO, llvm::AtomicOrdering AO, + bool IsXLHSInRHSPart) { + auto &Context = CGF.CGM.getContext(); + // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' // expression is simple and atomic is allowed for the given type for the // target platform. - if (ExprRValue.isScalar() && - ExprRValue.getScalarVal()->getType()->isIntegerTy() && - XLValue.isSimple() && - (isa<llvm::ConstantInt>(ExprRValue.getScalarVal()) || - (ExprRValue.getScalarVal()->getType() == - XLValue.getAddress()->getType()->getPointerElementType())) && - Context.getTargetInfo().hasBuiltinAtomic( - Context.getTypeSize(XLValue.getType()), - Context.toBits(XLValue.getAlignment()))) { - switch (Op) { - case BO_Add: - RMWOp = llvm::AtomicRMWInst::Add; - break; - case BO_Sub: - if (IsXLHSInRHSPart) { - RMWOp = llvm::AtomicRMWInst::Sub; - } - break; - case BO_And: - RMWOp = llvm::AtomicRMWInst::And; - break; - case BO_Or: - RMWOp = llvm::AtomicRMWInst::Or; - break; - case BO_Xor: - RMWOp = llvm::AtomicRMWInst::Xor; - break; - case BO_Mul: - case BO_Div: - case BO_Rem: - case BO_Shl: - case BO_Shr: - break; - case BO_PtrMemD: - case BO_PtrMemI: - case BO_LT: - case BO_GT: - case BO_LE: - case BO_GE: - case BO_EQ: - case BO_NE: - case BO_LAnd: - case BO_LOr: - case BO_Assign: - case BO_MulAssign: - case BO_DivAssign: - case BO_RemAssign: - case BO_AddAssign: - case BO_SubAssign: - case BO_ShlAssign: - case BO_ShrAssign: - case BO_AndAssign: - case BO_XorAssign: - case BO_OrAssign: - case BO_Comma: - llvm_unreachable("Unexpected binary operation in 'atomic update'."); + if (BO == BO_Comma || !Update.isScalar() || + !Update.getScalarVal()->getType()->isIntegerTy() || !X.isSimple() || + (!isa<llvm::ConstantInt>(Update.getScalarVal()) && + (Update.getScalarVal()->getType() != + X.getAddress()->getType()->getPointerElementType())) || + !Context.getTargetInfo().hasBuiltinAtomic( + Context.getTypeSize(X.getType()), Context.toBits(X.getAlignment()))) + return false; + + llvm::AtomicRMWInst::BinOp RMWOp; + switch (BO) { + case BO_Add: + RMWOp = llvm::AtomicRMWInst::Add; + break; + case BO_Sub: + if (!IsXLHSInRHSPart) + return false; + RMWOp = llvm::AtomicRMWInst::Sub; + break; + case BO_And: + RMWOp = llvm::AtomicRMWInst::And; + break; + case BO_Or: + RMWOp = llvm::AtomicRMWInst::Or; + break; + case BO_Xor: + RMWOp = llvm::AtomicRMWInst::Xor; + break; + case BO_LT: + RMWOp = X.getType()->hasSignedIntegerRepresentation() + ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min + : llvm::AtomicRMWInst::Max) + : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin + : llvm::AtomicRMWInst::UMax); + break; + case BO_GT: + RMWOp = X.getType()->hasSignedIntegerRepresentation() + ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max + : llvm::AtomicRMWInst::Min) + : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax + : llvm::AtomicRMWInst::UMin); + break; + case BO_Mul: + case BO_Div: + case BO_Rem: + case BO_Shl: + case BO_Shr: + case BO_LAnd: + case BO_LOr: + return false; + case BO_PtrMemD: + case BO_PtrMemI: + case BO_LE: + case BO_GE: + case BO_EQ: + case BO_NE: + case BO_Assign: + case BO_AddAssign: + case BO_SubAssign: + case BO_AndAssign: + case BO_OrAssign: + case BO_XorAssign: + case BO_MulAssign: + case BO_DivAssign: + case BO_RemAssign: + case BO_ShlAssign: + case BO_ShrAssign: + case BO_Comma: + llvm_unreachable("Unsupported atomic update operation"); + } + auto *UpdateVal = Update.getScalarVal(); + if (auto *IC = dyn_cast<llvm::ConstantInt>(UpdateVal)) { + UpdateVal = CGF.Builder.CreateIntCast( + IC, X.getAddress()->getType()->getPointerElementType(), + X.getType()->hasSignedIntegerRepresentation()); + } + CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO); + return true; +} + +void CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( + LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, + llvm::AtomicOrdering AO, SourceLocation Loc, + const llvm::function_ref<RValue(RValue)> &CommonGen) { + // Update expressions are allowed to have the following forms: + // x binop= expr; -> xrval + expr; + // x++, ++x -> xrval + 1; + // x--, --x -> xrval - 1; + // x = x binop expr; -> xrval binop expr + // x = expr Op x; - > expr binop xrval; + if (!emitOMPAtomicRMW(*this, X, E, BO, AO, IsXLHSInRHSPart)) { + if (X.isGlobalReg()) { + // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop + // 'xrval'. + EmitStoreThroughLValue(CommonGen(EmitLoadOfLValue(X, Loc)), X); + } else { + // Perform compare-and-swap procedure. + EmitAtomicUpdate(X, AO, CommonGen, X.getType().isVolatileQualified()); } } - return std::move(RMWOp); } static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, @@ -1237,42 +1346,22 @@ static void EmitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, // x--, --x -> xrval - 1; // x = x binop expr; -> xrval binop expr // x = expr Op x; - > expr binop xrval; - assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); + assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - const auto &Op = - getCompatibleAtomicRMWBinOp(CGF.CGM.getContext(), BOUE->getOpcode(), - IsXLHSInRHSPart, XLValue, ExprRValue); auto AO = IsSeqCst ? llvm::SequentiallyConsistent : llvm::Monotonic; - if (Op) { - auto *ExprVal = ExprRValue.getScalarVal(); - if (auto *IC = dyn_cast<llvm::ConstantInt>(ExprVal)) { - ExprVal = CGF.Builder.CreateIntCast( - IC, XLValue.getAddress()->getType()->getPointerElementType(), - XLValue.getType()->hasSignedIntegerRepresentation()); - } - CGF.Builder.CreateAtomicRMW(*Op, XLValue.getAddress(), ExprVal, AO); - } else { - auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); - auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); - CodeGenFunction::OpaqueValueMapping MapExpr( - CGF, IsXLHSInRHSPart ? RHS : LHS, ExprRValue); - auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; - if (XLValue.isGlobalReg()) { - // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop - // 'xrval'. - CodeGenFunction::OpaqueValueMapping MapX( - CGF, XRValExpr, CGF.EmitLoadOfLValue(XLValue, Loc)); - CGF.EmitStoreThroughLValue(CGF.EmitAnyExpr(UE), XLValue); - } else { - // Perform compare-and-swap procedure. - CGF.EmitAtomicUpdate( - XLValue, AO, [&CGF, &UE, &XRValExpr](RValue XRVal) -> RValue { - CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRVal); - return CGF.EmitAnyExpr(UE); - }, /*IsVolatile=*/false); - } - } + auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); + auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); + auto *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; + auto *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; + auto Gen = + [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) -> RValue { + CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); + CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); + return CGF.EmitAnyExpr(UE); + }; + CGF.EmitOMPAtomicSimpleUpdateExpr(XLValue, ExprRValue, BOUE->getOpcode(), + IsXLHSInRHSPart, AO, Loc, Gen); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 0ab1e5ea8b1..7cf1521fcd0 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2032,10 +2032,39 @@ public: void EmitOMPAggregateAssign(LValue OriginalAddr, llvm::Value *PrivateAddr, const Expr *AssignExpr, QualType Type, const VarDecl *VDInit); + /// \brief Emit atomic update code for constructs: \a X = \a X \a BO \a E or + /// \a X = \a E \a BO \a E. + /// + /// \param X Value to be updated. + /// \param E Update value. + /// \param BO Binary operation for update operation. + /// \param IsXLHSInRHSPart true if \a X is LHS in RHS part of the update + /// expression, false otherwise. + /// \param AO Atomic ordering of the generated atomic instructions. + /// \param CommonGen Code generator for complex expressions that cannot be + /// expressed through atomicrmw instruction. + void EmitOMPAtomicSimpleUpdateExpr( + LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, + llvm::AtomicOrdering AO, SourceLocation Loc, + const llvm::function_ref<RValue(RValue)> &CommonGen); void EmitOMPFirstprivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); void EmitOMPPrivateClause(const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope); + /// \brief Emit initial code for reduction variables. Creates reduction copies + /// and initializes them with the values according to OpenMP standard. + /// + /// \param D Directive (possibly) with the 'reduction' clause. + /// \param PrivateScope Private scope for capturing reduction variables for + /// proper codegen in internal captured statement. + /// + void EmitOMPReductionClauseInit(const OMPExecutableDirective &D, + OMPPrivateScope &PrivateScope); + /// \brief Emit final update of reduction values to original variables at + /// the end of the directive. + /// + /// \param D Directive that has at least one 'reduction' directives. + void EmitOMPReductionClauseFinal(const OMPExecutableDirective &D); void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); |