summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen
diff options
context:
space:
mode:
authorAlexander Musman <alexander.musman@gmail.com>2014-12-15 07:07:06 +0000
committerAlexander Musman <alexander.musman@gmail.com>2014-12-15 07:07:06 +0000
commitc638868bdf239402120b2dc83c6ea0fc3bd0fac8 (patch)
tree0ef5c973d8773109715a9ab1ee6d442716c61010 /clang/lib/CodeGen
parentecabbc52d51a737ae9964190933c7a0abcff3b21 (diff)
downloadbcm5719-llvm-c638868bdf239402120b2dc83c6ea0fc3bd0fac8.tar.gz
bcm5719-llvm-c638868bdf239402120b2dc83c6ea0fc3bd0fac8.zip
First patch with codegen of the 'omp for' directive. It implements
the simplest case, which is used when no chunk_size is specified in the schedule(static) or no 'schedule' clause is specified - the iteration space is divided by the library into chunks that are approximately equal in size, and at most one chunk is distributed to each thread. In this case, we do not need an outer loop in each thread - each thread requests once which iterations range it should handle (using __kmpc_for_static_init runtime call) and then runs the inner loop on this range. Differential Revision: http://reviews.llvm.org/D5865 llvm-svn: 224233
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp188
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h54
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp106
-rw-r--r--clang/lib/CodeGen/CodeGenFunction.h7
4 files changed, 351 insertions, 4 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 206a86fe65d..ba4a16bbf94 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -147,7 +147,9 @@ llvm::Value *CGOpenMPRuntime::EmitOpenMPUpdateLocation(
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
if (I != OpenMPLocThreadIDMap.end())
LocValue = I->second.DebugLoc;
- else {
+ // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
+ // GetOpenMPThreadID was called before this routine.
+ if (LocValue == nullptr) {
// Generate "ident_t .kmpc_loc.addr;"
llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
@@ -332,6 +334,95 @@ CGOpenMPRuntime::CreateRuntimeFunction(OpenMPRTLFunction Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
break;
}
+ // Build __kmpc_for_static_init*(
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+ // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+ // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+ // kmp_int[32|64] incr, kmp_int[32|64] chunk);
+ case OMPRTL__kmpc_for_static_init_4: {
+ auto ITy = CGM.Int32Ty;
+ auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), // loc
+ CGM.Int32Ty, // tid
+ CGM.Int32Ty, // schedtype
+ llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+ PtrTy, // p_lower
+ PtrTy, // p_upper
+ PtrTy, // p_stride
+ ITy, // incr
+ ITy // chunk
+ };
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4");
+ break;
+ }
+ case OMPRTL__kmpc_for_static_init_4u: {
+ auto ITy = CGM.Int32Ty;
+ auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), // loc
+ CGM.Int32Ty, // tid
+ CGM.Int32Ty, // schedtype
+ llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+ PtrTy, // p_lower
+ PtrTy, // p_upper
+ PtrTy, // p_stride
+ ITy, // incr
+ ITy // chunk
+ };
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_4u");
+ break;
+ }
+ case OMPRTL__kmpc_for_static_init_8: {
+ auto ITy = CGM.Int64Ty;
+ auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), // loc
+ CGM.Int32Ty, // tid
+ CGM.Int32Ty, // schedtype
+ llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+ PtrTy, // p_lower
+ PtrTy, // p_upper
+ PtrTy, // p_stride
+ ITy, // incr
+ ITy // chunk
+ };
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8");
+ break;
+ }
+ case OMPRTL__kmpc_for_static_init_8u: {
+ auto ITy = CGM.Int64Ty;
+ auto PtrTy = llvm::PointerType::getUnqual(ITy);
+ llvm::Type *TypeParams[] = {
+ getIdentTyPointerTy(), // loc
+ CGM.Int32Ty, // tid
+ CGM.Int32Ty, // schedtype
+ llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
+ PtrTy, // p_lower
+ PtrTy, // p_upper
+ PtrTy, // p_stride
+ ITy, // incr
+ ITy // chunk
+ };
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_init_8u");
+ break;
+ }
+ case OMPRTL__kmpc_for_static_fini: {
+ // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
+ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
+ break;
+ }
case OMPRTL__kmpc_push_num_threads: {
// Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
// kmp_int32 num_threads)
@@ -713,6 +804,101 @@ void CGOpenMPRuntime::EmitOMPBarrierCall(CodeGenFunction &CGF,
CGF.EmitRuntimeCall(RTLFn, Args);
}
+/// \brief Schedule types for 'omp for' loops (these enumerators are taken from
+/// the enum sched_type in kmp.h).
+enum OpenMPSchedType {
+ /// \brief Lower bound for default (unordered) versions.
+ OMP_sch_lower = 32,
+ OMP_sch_static_chunked = 33,
+ OMP_sch_static = 34,
+ OMP_sch_dynamic_chunked = 35,
+ OMP_sch_guided_chunked = 36,
+ OMP_sch_runtime = 37,
+ OMP_sch_auto = 38,
+ /// \brief Lower bound for 'ordered' versions.
+ OMP_ord_lower = 64,
+ /// \brief Lower bound for 'nomerge' versions.
+ OMP_nm_lower = 160,
+};
+
+/// \brief Map the OpenMP loop schedule to the runtime enumeration.
+static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) {
+ switch (ScheduleKind) {
+ case OMPC_SCHEDULE_static:
+ return Chunked ? OMP_sch_static_chunked : OMP_sch_static;
+ case OMPC_SCHEDULE_dynamic:
+ return OMP_sch_dynamic_chunked;
+ case OMPC_SCHEDULE_guided:
+ return OMP_sch_guided_chunked;
+ case OMPC_SCHEDULE_auto:
+ return OMP_sch_auto;
+ case OMPC_SCHEDULE_runtime:
+ return OMP_sch_runtime;
+ case OMPC_SCHEDULE_unknown:
+ assert(!Chunked && "chunk was specified but schedule kind not known");
+ return OMP_sch_static;
+ }
+ llvm_unreachable("Unexpected runtime schedule");
+}
+
+bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const {
+ auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
+ return Schedule == OMP_sch_static;
+}
+
+void CGOpenMPRuntime::EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPScheduleClauseKind ScheduleKind,
+ unsigned IVSize, bool IVSigned,
+ llvm::Value *IL, llvm::Value *LB,
+ llvm::Value *UB, llvm::Value *ST,
+ llvm::Value *Chunk) {
+ OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunk != nullptr);
+ // Call __kmpc_for_static_init(
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
+ // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
+ // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
+ // kmp_int[32|64] incr, kmp_int[32|64] chunk);
+ // TODO: Implement dynamic schedule.
+
+ // If the Chunk was not specified in the clause - use default value 1.
+ if (Chunk == nullptr)
+ Chunk = CGF.Builder.getIntN(IVSize, /*C*/ 1);
+
+ llvm::Value *Args[] = {
+ EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+ GetOpenMPThreadID(CGF, Loc),
+ CGF.Builder.getInt32(Schedule), // Schedule type
+ IL, // &isLastIter
+ LB, // &LB
+ UB, // &UB
+ ST, // &Stride
+ CGF.Builder.getIntN(IVSize, 1), // Incr
+ Chunk // Chunk
+ };
+ assert((IVSize == 32 || IVSize == 64) &&
+ "Index size is not compatible with the omp runtime");
+ auto F = IVSize == 32 ? (IVSigned ? OMPRTL__kmpc_for_static_init_4
+ : OMPRTL__kmpc_for_static_init_4u)
+ : (IVSigned ? OMPRTL__kmpc_for_static_init_8
+ : OMPRTL__kmpc_for_static_init_8u);
+ auto RTLFn = CreateRuntimeFunction(F);
+ CGF.EmitRuntimeCall(RTLFn, Args);
+}
+
+void CGOpenMPRuntime::EmitOMPForFinish(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPScheduleClauseKind ScheduleKind) {
+ assert((ScheduleKind == OMPC_SCHEDULE_static ||
+ ScheduleKind == OMPC_SCHEDULE_unknown) &&
+ "Non-static schedule kinds are not yet implemented");
+ // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
+ llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
+ GetOpenMPThreadID(CGF, Loc)};
+ auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_for_static_fini);
+ CGF.EmitRuntimeCall(RTLFn, Args);
+}
+
void CGOpenMPRuntime::EmitOMPNumThreadsClause(CodeGenFunction &CGF,
llvm::Value *NumThreads,
SourceLocation Loc) {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 45cadd2c94f..cfc5eaaaf21 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -15,6 +15,7 @@
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/OpenMPKinds.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringMap.h"
@@ -66,6 +67,12 @@ private:
// Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_cancel_barrier,
+ // Calls for static scheduling 'omp for' loops.
+ OMPRTL__kmpc_for_static_init_4,
+ OMPRTL__kmpc_for_static_init_4u,
+ OMPRTL__kmpc_for_static_init_8,
+ OMPRTL__kmpc_for_static_init_8u,
+ OMPRTL__kmpc_for_static_fini,
// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
// global_tid);
OMPRTL__kmpc_serialized_parallel,
@@ -305,6 +312,53 @@ public:
virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
bool IsExplicit = true);
+ /// \brief Check if the specified \a ScheduleKind is static non-chunked.
+ /// This kind of worksharing directive is emitted without outer loop.
+ /// \param ScheduleKind Schedule kind specified in the 'schedule' clause.
+ /// \param Chunked True if chunk is specified in the clause.
+ ///
+ virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
+ bool Chunked) const;
+
+ /// \brief Call the appropriate runtime routine to initialize it before start
+ /// of loop.
+ ///
+ /// Depending on the loop schedule, it is nesessary to call some runtime
+ /// routine before start of the OpenMP loop to get the loop upper / lower
+ /// bounds \a LB and \a UB and stride \a ST.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+ /// \param IVSize Size of the iteration variable in bits.
+ /// \param IVSigned Sign of the interation variable.
+ /// \param IL Address of the output variable in which the flag of the
+ /// last iteration is returned.
+ /// \param LB Address of the output variable in which the lower iteration
+ /// number is returned.
+ /// \param UB Address of the output variable in which the upper iteration
+ /// number is returned.
+ /// \param ST Address of the output variable in which the stride value is
+ /// returned nesessary to generated the static_chunked scheduled loop.
+ /// \param Chunk Value of the chunk for the static_chunked scheduled loop.
+ /// For the default (nullptr) value, the chunk 1 will be used.
+ ///
+ virtual void EmitOMPForInit(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPScheduleClauseKind SchedKind,
+ unsigned IVSize, bool IVSigned, llvm::Value *IL,
+ llvm::Value *LB, llvm::Value *UB, llvm::Value *ST,
+ llvm::Value *Chunk = nullptr);
+
+ /// \brief Call the appropriate runtime routine to notify that we finished
+ /// all the work with current loop.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+ ///
+ virtual void EmitOMPForFinish(CodeGenFunction &CGF, SourceLocation Loc,
+ OpenMPScheduleClauseKind ScheduleKind);
+
/// \brief Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
/// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
/// clause.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index cb00c69a66f..fd4310cc634 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -470,8 +470,110 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}
-void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &) {
- llvm_unreachable("CodeGen for 'omp for' is not supported yet.");
+/// \brief Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+ const DeclRefExpr *Helper) {
+ auto VDecl = cast<VarDecl>(Helper->getDecl());
+ CGF.EmitVarDecl(*VDecl);
+ return CGF.EmitLValue(Helper);
+}
+
+void CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
+ // Emit the loop iteration variable.
+ auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
+ auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
+ EmitVarDecl(*IVDecl);
+
+ // Emit the iterations count variable.
+ // If it is not a variable, Sema decided to calculate iterations count on each
+ // iteration (e.g., it is foldable into a constant).
+ if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) {
+ EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl()));
+ // Emit calculation of the iterations count.
+ EmitIgnoredExpr(S.getCalcLastIteration());
+ }
+
+ auto &RT = CGM.getOpenMPRuntime();
+
+ // Check pre-condition.
+ {
+ // Skip the entire loop if we don't meet the precondition.
+ RegionCounter Cnt = getPGORegionCounter(&S);
+ auto ThenBlock = createBasicBlock("omp.precond.then");
+ auto ContBlock = createBasicBlock("omp.precond.end");
+ EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount());
+ EmitBlock(ThenBlock);
+ Cnt.beginRegion(Builder);
+ // Emit 'then' code.
+ {
+ // Emit helper vars inits.
+ LValue LB =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+ LValue ST =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
+ LValue IL =
+ EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getIsLastIterVariable()));
+
+ OMPPrivateScope LoopScope(*this);
+ EmitPrivateLoopCounters(*this, LoopScope, S.counters());
+
+ // Detect the loop schedule kind and chunk.
+ auto ScheduleKind = OMPC_SCHEDULE_unknown;
+ llvm::Value *Chunk = nullptr;
+ if (auto C = cast_or_null<OMPScheduleClause>(
+ S.getSingleClause(OMPC_schedule))) {
+ ScheduleKind = C->getScheduleKind();
+ if (auto Ch = C->getChunkSize()) {
+ Chunk = EmitScalarExpr(Ch);
+ Chunk = EmitScalarConversion(Chunk, Ch->getType(),
+ S.getIterationVariable()->getType());
+ }
+ }
+ const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
+ const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
+ if (RT.isStaticNonchunked(ScheduleKind,
+ /* Chunked */ Chunk != nullptr)) {
+ // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
+ // When no chunk_size is specified, the iteration space is divided into
+ // chunks that are approximately equal in size, and at most one chunk is
+ // distributed to each thread. Note that the size of the chunks is
+ // unspecified in this case.
+ RT.EmitOMPForInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
+ IL.getAddress(), LB.getAddress(), UB.getAddress(),
+ ST.getAddress());
+ // UB = min(UB, GlobalUB);
+ EmitIgnoredExpr(S.getEnsureUpperBound());
+ // IV = LB;
+ EmitIgnoredExpr(S.getInit());
+ // while (idx <= UB) { BODY; ++idx; }
+ EmitOMPInnerLoop(S, LoopScope);
+ // Tell the runtime we are done.
+ RT.EmitOMPForFinish(*this, S.getLocStart(), ScheduleKind);
+ } else
+ ErrorUnsupported(&S, "OpenMP loop with requested schedule");
+ }
+ // We're now done with the loop, so jump to the continuation block.
+ EmitBranch(ContBlock);
+ EmitBlock(ContBlock, true);
+ }
+}
+
+void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
+ RunCleanupsScope DirectiveScope(*this);
+
+ CGDebugInfo *DI = getDebugInfo();
+ if (DI)
+ DI->EmitLexicalBlockStart(Builder, S.getSourceRange().getBegin());
+
+ EmitOMPWorksharingLoop(S);
+
+ // Emit an implicit barrier at the end.
+ CGM.getOpenMPRuntime().EmitOMPBarrierCall(*this, S.getLocStart(),
+ /*IsExplicit*/ false);
+ if (DI)
+ DI->EmitLexicalBlockEnd(Builder, S.getSourceRange().getEnd());
}
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 6c61df78bdf..8a8827be10d 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2043,12 +2043,17 @@ public:
void EmitOMPTargetDirective(const OMPTargetDirective &S);
void EmitOMPTeamsDirective(const OMPTeamsDirective &S);
- /// Helpers for 'omp simd' directive.
+private:
+
+ /// Helpers for the OpenMP loop directives.
void EmitOMPLoopBody(const OMPLoopDirective &Directive,
bool SeparateIter = false);
void EmitOMPInnerLoop(const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
bool SeparateIter = false);
void EmitOMPSimdFinal(const OMPLoopDirective &S);
+ void EmitOMPWorksharingLoop(const OMPLoopDirective &S);
+
+public:
//===--------------------------------------------------------------------===//
// LValue Expression Emission
OpenPOWER on IntegriCloud