diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2016-05-25 12:36:08 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2016-05-25 12:36:08 +0000 |
commit | 8b42706a6eaf5dda91e8b2fe81c9bce39893cca4 (patch) | |
tree | b373f9979a1c48474915ef007e61bea2e71c460b /clang/lib/CodeGen | |
parent | 70381ed3e7abb9978ba4e44e6ed7206349e68ed3 (diff) | |
download | bcm5719-llvm-8b42706a6eaf5dda91e8b2fe81c9bce39893cca4.tar.gz bcm5719-llvm-8b42706a6eaf5dda91e8b2fe81c9bce39893cca4.zip |
[OPENMP 4.5] Codegen for dacross loop synchronization constructs.
OpenMP 4.5 adds support for doacross loop synchronization. Patch
implements codegen for this construct.
llvm-svn: 270690
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 159 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 17 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 14 |
3 files changed, 188 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 87e2ed61af2..4b0d21373ed 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -627,6 +627,17 @@ enum OpenMPRTLFunction { // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int // sched, kmp_uint64 grainsize, void *task_dup); OMPRTL__kmpc_taskloop, + // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 + // num_dims, struct kmp_dim *dims); + OMPRTL__kmpc_doacross_init, + // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); + OMPRTL__kmpc_doacross_fini, + // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + OMPRTL__kmpc_doacross_post, + // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + OMPRTL__kmpc_doacross_wait, // // Offloading related calls @@ -1476,6 +1487,46 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); break; } + case OMPRTL__kmpc_doacross_init: { + // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 + // num_dims, struct kmp_dim *dims); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.Int32Ty, + CGM.Int32Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init"); + break; + } + case OMPRTL__kmpc_doacross_fini: { + // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini"); + break; + } + case OMPRTL__kmpc_doacross_post: { + // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post"); + break; + } + case OMPRTL__kmpc_doacross_wait: { + // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 + // *vec); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.Int64Ty->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -6316,3 +6367,111 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State); } } + +namespace { +/// Cleanup action for doacross support. +class DoacrossCleanupTy final : public EHScopeStack::Cleanup { +public: + static const int DoacrossFinArgs = 2; + +private: + llvm::Value *RTLFn; + llvm::Value *Args[DoacrossFinArgs]; + +public: + DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs) + : RTLFn(RTLFn) { + assert(CallArgs.size() == DoacrossFinArgs); + std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args)); + } + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + CGF.EmitRuntimeCall(RTLFn, Args); + } +}; +} // namespace + +void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, + const OMPLoopDirective &D) { + if (!CGF.HaveInsertPoint()) + return; + + ASTContext &C = CGM.getContext(); + QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true); + RecordDecl *RD; + if (KmpDimTy.isNull()) { + // Build struct kmp_dim { // loop bounds info casted to kmp_int64 + // kmp_int64 lo; // lower + // kmp_int64 up; // upper + // kmp_int64 st; // stride + // }; + RD = C.buildImplicitRecord("kmp_dim"); + RD->startDefinition(); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + addFieldToRecordDecl(C, RD, Int64Ty); + RD->completeDefinition(); + KmpDimTy = C.getRecordType(RD); + } else + RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); + + Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims"); + CGF.EmitNullInitialization(DimsAddr, KmpDimTy); + enum { LowerFD = 0, UpperFD, StrideFD }; + // Fill dims with data. + LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy); + // dims.upper = num_iterations; + LValue UpperLVal = + CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD)); + llvm::Value *NumIterVal = CGF.EmitScalarConversion( + CGF.EmitScalarExpr(D.getNumIterations()), D.getNumIterations()->getType(), + Int64Ty, D.getNumIterations()->getExprLoc()); + CGF.EmitStoreOfScalar(NumIterVal, UpperLVal); + // dims.stride = 1; + LValue StrideLVal = + CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD)); + CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1), + StrideLVal); + + // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, + // kmp_int32 num_dims, struct kmp_dim * dims); + llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()), + getThreadID(CGF, D.getLocStart()), + llvm::ConstantInt::getSigned(CGM.Int32Ty, 1), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + DimsAddr.getPointer(), CGM.VoidPtrTy)}; + + llvm::Value *RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_init); + CGF.EmitRuntimeCall(RTLFn, Args); + llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = { + emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())}; + llvm::Value *FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_fini); + CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, + llvm::makeArrayRef(FiniArgs)); +} + +void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C) { + QualType Int64Ty = + CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + const Expr *CounterVal = C->getCounterValue(); + assert(CounterVal); + llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal), + CounterVal->getType(), Int64Ty, + CounterVal->getExprLoc()); + Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr"); + CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty); + llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()), + getThreadID(CGF, C->getLocStart()), + CntAddr.getPointer()}; + llvm::Value *RTLFn; + if (C->getDependencyKind() == OMPC_DEPEND_source) + RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_post); + else { + assert(C->getDependencyKind() == OMPC_DEPEND_sink); + RTLFn = createRuntimeFunction(OMPRTL__kmpc_doacross_wait); + } + CGF.EmitRuntimeCall(RTLFn, Args); +} + diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 63616f13f80..973e1f96b44 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -37,6 +37,7 @@ class Value; namespace clang { class Expr; class GlobalDecl; +class OMPDependClause; class OMPExecutableDirective; class OMPLoopDirective; class VarDecl; @@ -201,6 +202,12 @@ private: /// } flags; /// } kmp_depend_info_t; QualType KmpDependInfoTy; + /// struct kmp_dim { // loop bounds info casted to kmp_int64 + /// kmp_int64 lo; // lower + /// kmp_int64 up; // upper + /// kmp_int64 st; // stride + /// }; + QualType KmpDimTy; /// \brief Type struct __tgt_offload_entry{ /// void *addr; // Pointer to the offload entry info. /// // (function or global) @@ -1020,6 +1027,16 @@ public: /// attributes. virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn); + + /// Emit initialization for doacross loop nesting support. + /// \param D Loop-based construct used in doacross nesting construct. + virtual void emitDoacrossInit(CodeGenFunction &CGF, + const OMPLoopDirective &D); + + /// Emit code for doacross ordered directive with 'depend' clause. + /// \param C 'depend' clause with 'sink|source' dependency kind. + virtual void emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C); }; } // namespace CodeGen diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index cfe4cb714de..45d8d0af10d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1913,6 +1913,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { incrementProfileCounter(&S); } + bool Ordered = false; + if (auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { + if (OrderedClause->getNumForLoops()) + RT.emitDoacrossInit(*this, S); + else + Ordered = true; + } + llvm::DenseSet<const Expr *> EmittedFinals; emitAlignedClause(*this, S); EmitOMPLinearClauseInit(S); @@ -1960,7 +1968,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { } const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - const bool Ordered = S.getSingleClause<OMPOrderedClause>() != nullptr; // OpenMP 4.5, 2.7.1 Loop Construct, Description. // If the static schedule kind is specified or if the ordered clause is // specified, and if no monotonic modifier is specified, the effect will @@ -2685,8 +2692,11 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, } void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { - if (!S.getAssociatedStmt()) + if (!S.getAssociatedStmt()) { + for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) + CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); return; + } auto *C = S.getSingleClause<OMPSIMDClause>(); auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, PrePostActionTy &Action) { |