summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp151
1 files changed, 125 insertions, 26 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 7d12c3620da..828be92d542 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -72,6 +72,8 @@ public:
/// \return LValue for thread id variable. This LValue always has type int32*.
virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
+ virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
+
CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
@@ -82,6 +84,8 @@ public:
return Info->getKind() == CR_OpenMP;
}
+ ~CGOpenMPRegionInfo() override = default;
+
protected:
CGOpenMPRegionKind RegionKind;
RegionCodeGenTy CodeGen;
@@ -90,7 +94,7 @@ protected:
};
/// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
const RegionCodeGenTy &CodeGen,
@@ -121,14 +125,62 @@ private:
};
/// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
public:
+ class UntiedTaskActionTy final : public PrePostActionTy {
+ bool Untied;
+ const VarDecl *PartIDVar;
+ const RegionCodeGenTy &UntiedCodeGen;
+ llvm::SwitchInst *UntiedSwitch = nullptr;
+
+ public:
+ UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
+ const RegionCodeGenTy &UntiedCodeGen)
+ : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
+ void Enter(CodeGenFunction &CGF) override {
+ if (Untied) {
+ // Emit task switching point.
+ auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+ CGF.GetAddrOfLocalVar(PartIDVar),
+ PartIDVar->getType()->castAs<PointerType>());
+ auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
+ auto *DoneBB = CGF.createBasicBlock(".untied.done.");
+ UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
+ CGF.EmitBlock(DoneBB);
+ CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+ CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
+ UntiedSwitch->addCase(CGF.Builder.getInt32(0),
+ CGF.Builder.GetInsertBlock());
+ emitUntiedSwitch(CGF);
+ }
+ }
+ void emitUntiedSwitch(CodeGenFunction &CGF) const {
+ if (Untied) {
+ auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
+ CGF.GetAddrOfLocalVar(PartIDVar),
+ PartIDVar->getType()->castAs<PointerType>());
+ CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
+ PartIdLVal);
+ UntiedCodeGen(CGF);
+ CodeGenFunction::JumpDest CurPoint =
+ CGF.getJumpDestInCurrentScope(".untied.next.");
+ CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+ CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
+ UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
+ CGF.Builder.GetInsertBlock());
+ CGF.EmitBranchThroughCleanup(CurPoint);
+ CGF.EmitBlock(CurPoint.getBlock());
+ }
+ }
+ unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
+ };
CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
const VarDecl *ThreadIDVar,
const RegionCodeGenTy &CodeGen,
- OpenMPDirectiveKind Kind, bool HasCancel)
+ OpenMPDirectiveKind Kind, bool HasCancel,
+ const UntiedTaskActionTy &Action)
: CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
- ThreadIDVar(ThreadIDVar) {
+ ThreadIDVar(ThreadIDVar), Action(Action) {
assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
}
@@ -142,6 +194,10 @@ public:
/// \brief Get the name of the capture helper.
StringRef getHelperName() const override { return ".omp_outlined."; }
+ void emitUntiedSwitch(CodeGenFunction &CGF) override {
+ Action.emitUntiedSwitch(CGF);
+ }
+
static bool classof(const CGCapturedStmtInfo *Info) {
return CGOpenMPRegionInfo::classof(Info) &&
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
@@ -152,6 +208,8 @@ private:
/// \brief A variable or parameter storing global thread id for OpenMP
/// constructs.
const VarDecl *ThreadIDVar;
+ /// Action for emitting code for untied tasks.
+ const UntiedTaskActionTy &Action;
};
/// \brief API for inlined captured statement code generation in OpenMP
@@ -210,6 +268,11 @@ public:
llvm_unreachable("No helper name for inlined OpenMP construct");
}
+ void emitUntiedSwitch(CodeGenFunction &CGF) override {
+ if (OuterRegionInfo)
+ OuterRegionInfo->emitUntiedSwitch(CGF);
+ }
+
CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
static bool classof(const CGCapturedStmtInfo *Info) {
@@ -217,6 +280,8 @@ public:
cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
}
+ ~CGOpenMPInlinedRegionInfo() override = default;
+
private:
/// \brief CodeGen info about outer OpenMP region.
CodeGenFunction::CGCapturedStmtInfo *OldCSI;
@@ -228,7 +293,7 @@ private:
/// captured fields. The name of the target region has to be unique in a given
/// application so it is provided by the client, because only the client has
/// the information to generate that.
-class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo {
+class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
public:
CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
const RegionCodeGenTy &CodeGen, StringRef HelperName)
@@ -257,7 +322,7 @@ static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
}
/// \brief API for generation of expressions captured in a innermost OpenMP
/// region.
-class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo {
+class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
public:
CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
: CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
@@ -757,16 +822,36 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction(
llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
- OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ const VarDecl *PartIDVar, const VarDecl *TaskTVar,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+ bool Tied, unsigned &NumberOfParts) {
+ auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ auto *ThreadID = getThreadID(CGF, D.getLocStart());
+ auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
+ llvm::Value *TaskArgs[] = {
+ UpLoc, ThreadID,
+ CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
+ TaskTVar->getType()->castAs<PointerType>())
+ .getPointer()};
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
+ };
+ CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
+ UntiedCodeGen);
+ CodeGen.setAction(Action);
assert(!ThreadIDVar->getType()->isPointerType() &&
"thread id variable must be of type kmp_int32 for tasks");
auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
CodeGenFunction CGF(CGM, true);
- CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
- InnermostKind,
- cast<OMPTaskDirective>(D).hasCancel());
+ CGOpenMPTaskOutlinedRegionInfo CGInfo(
+ *CS, ThreadIDVar, CodeGen, InnermostKind,
+ cast<OMPTaskDirective>(D).hasCancel(), Action);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
- return CGF.GenerateCapturedStmtFunction(*CS);
+ auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
+ CodeGen.clearAction();
+ if (!Tied)
+ NumberOfParts = Action.getNumberOfParts();
+ return Res;
}
Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
@@ -1898,6 +1983,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args);
+ if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+ Region->emitUntiedSwitch(CGF);
}
void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
@@ -2951,7 +3038,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
/// argument.
/// \code
/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
-/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map,
+/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
/// tt->shareds);
/// return 0;
/// }
@@ -2982,7 +3069,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
- // tt->task_data.shareds);
+ // tt, tt->task_data.shareds);
auto *GtidParam = CGF.EmitLoadOfScalar(
CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
LValue TDBase = CGF.EmitLoadOfPointerLValue(
@@ -2995,7 +3082,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
- auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal();
+ auto *PartidParam = PartIdLVal.getPointer();
auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
@@ -3014,7 +3101,11 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
}
llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam,
- TaskPrivatesMap, SharedsParam};
+ TaskPrivatesMap,
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ TDBase.getAddress(), CGF.VoidPtrTy)
+ .getPointer(),
+ SharedsParam};
CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
CGF.EmitStoreThroughLValue(
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
@@ -3154,8 +3245,8 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1,
void CGOpenMPRuntime::emitTaskCall(
CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
- llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
- const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
+ unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy,
+ Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
ArrayRef<const Expr *> PrivateCopies,
ArrayRef<const Expr *> FirstprivateVars,
ArrayRef<const Expr *> FirstprivateCopies,
@@ -3390,7 +3481,8 @@ void CGOpenMPRuntime::emitTaskCall(
KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
ArrayType::Normal, /*IndexTypeQuals=*/0);
// kmp_depend_info[<Dependences.size()>] deps;
- DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
+ DependenciesArray =
+ CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
for (unsigned i = 0; i < NumDependencies; ++i) {
const Expr *E = Dependences[i].second;
auto Addr = CGF.EmitLValue(E);
@@ -3448,8 +3540,6 @@ void CGOpenMPRuntime::emitTaskCall(
// NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
// libcall.
- // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
- // *new_task);
// Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
// kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
// kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
@@ -3467,18 +3557,25 @@ void CGOpenMPRuntime::emitTaskCall(
DepTaskArgs[5] = CGF.Builder.getInt32(0);
DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
}
- auto &&ThenCodeGen = [NumDependencies, &TaskArgs,
+ auto &&ThenCodeGen = [this, Tied, Loc, NumberOfParts, TDBase, KmpTaskTQTyRD,
+ NumDependencies, &TaskArgs,
&DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
- // TODO: add check for untied tasks.
- auto &RT = CGF.CGM.getOpenMPRuntime();
+ if (!Tied) {
+ auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
+ auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
+ CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
+ }
if (NumDependencies) {
CGF.EmitRuntimeCall(
- RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
- DepTaskArgs);
+ createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs);
} else {
- CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_task),
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
TaskArgs);
}
+ // Check if parent region is untied and build return for untied task;
+ if (auto *Region =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+ Region->emitUntiedSwitch(CGF);
};
llvm::Value *DepWaitTaskArgs[6];
@@ -4039,6 +4136,8 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
// Ignore return result until untied tasks are supported.
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args);
+ if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+ Region->emitUntiedSwitch(CGF);
}
void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
OpenPOWER on IntegriCloud