diff options
Diffstat (limited to 'clang/lib')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 146 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 27 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 84 |
3 files changed, 213 insertions, 44 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index f77983a15f4..f7eb565635e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -661,6 +661,51 @@ CGOpenMPRuntime::createRuntimeFunction(OpenMPRTLFunction Function) { return RTLFn; } +llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, + bool IVSigned) { + assert((IVSize == 32 || IVSize == 64) && + "IV size is not compatible with the omp runtime"); + auto Name = + IVSize == 32 + ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") + : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); + auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; + llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + CGM.Int32Ty, // schedtype + ITy, // lower + ITy, // upper + ITy, // stride + ITy // chunk + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); + return CGM.CreateRuntimeFunction(FnTy, Name); +} + +llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, + bool IVSigned) { + assert((IVSize == 32 || IVSize == 64) && + "IV size is not compatible with the omp runtime"); + auto Name = + IVSize == 32 + ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") + : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); + auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; + auto PtrTy = llvm::PointerType::getUnqual(ITy); + llvm::Type *TypeParams[] = { + getIdentTyPointerTy(), // loc + CGM.Int32Ty, // tid + llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter + PtrTy, // p_lower + PtrTy, // p_upper + PtrTy // p_stride + }; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); + return CGM.CreateRuntimeFunction(FnTy, Name); +} + llvm::Constant * CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { // Lookup the entry, lazily creating it if necessary. @@ -1076,34 +1121,56 @@ void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *UB, llvm::Value *ST, llvm::Value *Chunk) { OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr); - // Call __kmpc_for_static_init( - // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, - // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, - // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, - // kmp_int[32|64] incr, kmp_int[32|64] chunk); - // TODO: Implement dynamic schedule. - - // If the Chunk was not specified in the clause - use default value 1. - if (Chunk == nullptr) - Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1); - - llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), - CGF.Builder.getInt32(Schedule), // Schedule type - IL, // &isLastIter - LB, // &LB - UB, // &UB - ST, // &Stride - CGF.Builder.getIntN(IVSize, 1), // Incr - Chunk // Chunk - }; - assert((IVSize == 32 || IVSize == 64) && - "Index size is not compatible with the omp runtime"); - auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4 - : OMPRTL__kmpc_for_static_init_4u) - : (IVSigned ? OMPRTL__kmpc_for_static_init_8 - : OMPRTL__kmpc_for_static_init_8u); - CGF.EmitRuntimeCall(createRuntimeFunction(F), Args); + if (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked) { + // Call __kmpc_dispatch_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, + // kmp_int[32|64] lower, kmp_int[32|64] upper, + // kmp_int[32|64] stride, kmp_int[32|64] chunk); + + // If the Chunk was not specified in the clause - use default value 1. + if (Chunk == nullptr) + Chunk = CGF.Builder.getIntN(IVSize, 1); + llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), + getThreadID(CGF, Loc), + CGF.Builder.getInt32(Schedule), // Schedule type + CGF.Builder.getIntN(IVSize, 0), // Lower + UB, // Upper + CGF.Builder.getIntN(IVSize, 1), // Stride + Chunk // Chunk + }; + CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); + } else { + // Call __kmpc_for_static_init( + // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, + // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, + // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, + // kmp_int[32|64] incr, kmp_int[32|64] chunk); + if (Chunk == nullptr) { + assert(Schedule == OMP_sch_static && + "expected static non-chunked schedule"); + // If the Chunk was not specified in the clause - use default value 1. + Chunk = CGF.Builder.getIntN(IVSize, 1); + } else + assert(Schedule == OMP_sch_static_chunked && + "expected static chunked schedule"); + llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), + getThreadID(CGF, Loc), + CGF.Builder.getInt32(Schedule), // Schedule type + IL, // &isLastIter + LB, // &LB + UB, // &UB + ST, // &Stride + CGF.Builder.getIntN(IVSize, 1), // Incr + Chunk // Chunk + }; + assert((IVSize == 32 || IVSize == 64) && + "Index size is not compatible with the omp runtime"); + auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4 + : OMPRTL__kmpc_for_static_init_4u) + : (IVSigned ? OMPRTL__kmpc_for_static_init_8 + : OMPRTL__kmpc_for_static_init_8u); + CGF.EmitRuntimeCall(createRuntimeFunction(F), Args); + } } void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc, @@ -1118,6 +1185,29 @@ void CGOpenMPRuntime::emitForFinish(CodeGenFunction &CGF, SourceLocation Loc, Args); } +llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, + SourceLocation Loc, unsigned IVSize, + bool IVSigned, llvm::Value *IL, + llvm::Value *LB, llvm::Value *UB, + llvm::Value *ST) { + // Call __kmpc_dispatch_next( + // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, + // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, + // kmp_int[32|64] *p_stride); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc), + IL, // &isLastIter + LB, // &Lower + UB, // &Upper + ST // &Stride + }; + llvm::Value *Call = + CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); + return CGF.EmitScalarConversion( + Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true), + CGF.getContext().BoolTy); +} + void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index f4fb0325fe3..6fbc6c3fb20 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -222,6 +222,14 @@ class CGOpenMPRuntime { /// \return Specified function. llvm::Constant *createRuntimeFunction(OpenMPRTLFunction Function); + /// \brief Returns __kmpc_dispatch_init_* runtime function for the specified + /// size \a IVSize and sign \a IVSigned. + llvm::Constant *createDispatchInitFunction(unsigned IVSize, bool IVSigned); + + /// \brief Returns __kmpc_dispatch_next_* runtime function for the specified + /// size \a IVSize and sign \a IVSigned. + llvm::Constant *createDispatchNextFunction(unsigned IVSize, bool IVSigned); + /// \brief If the specified mangled name is not in the module, create and /// return threadprivate cache object. This object is a pointer's worth of /// storage that's reserved for use by the OpenMP runtime. @@ -400,6 +408,25 @@ public: virtual void emitForFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPScheduleClauseKind ScheduleKind); + /// Call __kmpc_dispatch_next( + /// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, + /// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, + /// kmp_int[32|64] *p_stride); + /// \param IVSize Size of the iteration variable in bits. + /// \param IVSigned Sign of the interation variable. + /// \param IL Address of the output variable in which the flag of the + /// last iteration is returned. + /// \param LB Address of the output variable in which the lower iteration + /// number is returned. + /// \param UB Address of the output variable in which the upper iteration + /// number is returned. + /// \param ST Address of the output variable in which the stride value is + /// returned. + virtual llvm::Value *emitForNext(CodeGenFunction &CGF, SourceLocation Loc, + unsigned IVSize, bool IVSigned, + llvm::Value *IL, llvm::Value *LB, + llvm::Value *UB, llvm::Value *ST); + /// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads' /// clause. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 0a82adf9654..e3c4384a4bd 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -490,16 +490,50 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, llvm::Value *ST, llvm::Value *IL, llvm::Value *Chunk) { auto &RT = CGM.getOpenMPRuntime(); + + // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). + const bool Dynamic = RT.isDynamic(ScheduleKind); + assert(!RT.isStaticNonchunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && "static non-chunked schedule does not need outer loop"); - if (RT.isDynamic(ScheduleKind)) { - ErrorUnsupported(&S, "OpenMP loop with dynamic schedule"); - return; - } // Emit outer loop. // // OpenMP [2.7.1, Loop Construct, Description, table 2-1] + // When schedule(dynamic,chunk_size) is specified, the iterations are + // distributed to threads in the team in chunks as the threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be distributed. Each chunk contains chunk_size + // iterations, except for the last chunk to be distributed, which may have + // fewer iterations. When no chunk_size is specified, it defaults to 1. + // + // When schedule(guided,chunk_size) is specified, the iterations are assigned + // to threads in the team in chunks as the executing threads request them. + // Each thread executes a chunk of iterations, then requests another chunk, + // until no chunks remain to be assigned. For a chunk_size of 1, the size of + // each chunk is proportional to the number of unassigned iterations divided + // by the number of threads in the team, decreasing to 1. For a chunk_size + // with value k (greater than 1), the size of each chunk is determined in the + // same way, with the restriction that the chunks do not contain fewer than k + // iterations (except for the last chunk to be assigned, which may have fewer + // than k iterations). + // + // When schedule(auto) is specified, the decision regarding scheduling is + // delegated to the compiler and/or runtime system. The programmer gives the + // implementation the freedom to choose any possible mapping of iterations to + // threads in the team. + // + // When schedule(runtime) is specified, the decision regarding scheduling is + // deferred until run time, and the schedule and chunk size are taken from the + // run-sched-var ICV. If the ICV is set to auto, the schedule is + // implementation defined + // + // while(__kmpc_dispatch_next(&LB, &UB)) { + // idx = LB; + // while (idx <= UB) { BODY; ++idx; } // inner loop + // } + // + // OpenMP [2.7.1, Loop Construct, Description, table 2-1] // When schedule(static, chunk_size) is specified, iterations are divided into // chunks of size chunk_size, and the chunks are assigned to the threads in // the team in a round-robin fashion in the order of the thread number. @@ -510,12 +544,16 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, // UB = UB + ST; // } // + const Expr *IVExpr = S.getIterationVariable(); const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); - RT.emitForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, - UB, ST, Chunk); + RT.emitForInit( + *this, S.getLocStart(), ScheduleKind, IVSize, IVSigned, IL, LB, + (Dynamic ? EmitAnyExpr(S.getLastIteration()).getScalarVal() : UB), ST, + Chunk); + auto LoopExit = getJumpDestInCurrentScope("omp.dispatch.end"); // Start the loop with a block that tests the condition. @@ -524,12 +562,17 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, LoopStack.push(CondBlock); llvm::Value *BoolCondVal = nullptr; - // UB = min(UB, GlobalUB) - EmitIgnoredExpr(S.getEnsureUpperBound()); - // IV = LB - EmitIgnoredExpr(S.getInit()); - // IV < UB - BoolCondVal = EvaluateExprAsBool(S.getCond(false)); + if (!Dynamic) { + // UB = min(UB, GlobalUB) + EmitIgnoredExpr(S.getEnsureUpperBound()); + // IV = LB + EmitIgnoredExpr(S.getInit()); + // IV < UB + BoolCondVal = EvaluateExprAsBool(S.getCond(false)); + } else { + BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, + IL, LB, UB, ST); + } // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -545,6 +588,11 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, } EmitBlock(LoopBody); + // Emit "IV = LB" (in case of static schedule, we have already calculated new + // LB for loop condition and emitted it above). + if (Dynamic) + EmitIgnoredExpr(S.getInit()); + // Create a block for the increment. auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc"); BreakContinueStack.push_back(BreakContinue(LoopExit, Continue)); @@ -557,9 +605,11 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); - // Emit "LB = LB + Stride", "UB = UB + Stride". - EmitIgnoredExpr(S.getNextLowerBound()); - EmitIgnoredExpr(S.getNextUpperBound()); + if (!Dynamic) { + // Emit "LB = LB + Stride", "UB = UB + Stride". + EmitIgnoredExpr(S.getNextLowerBound()); + EmitIgnoredExpr(S.getNextUpperBound()); + } EmitBranch(CondBlock); LoopStack.pop(); @@ -567,7 +617,9 @@ void CodeGenFunction::EmitOMPForOuterLoop(OpenMPScheduleClauseKind ScheduleKind, EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); + // FIXME: Also call fini for ordered loops with dynamic scheduling. + if (!Dynamic) + RT.emitForFinish(*this, S.getLocStart(), ScheduleKind); } /// \brief Emit a helper variable and return corresponding lvalue. |