diff options
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 151 |
1 files changed, 125 insertions, 26 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 7d12c3620da..828be92d542 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -72,6 +72,8 @@ public: /// \return LValue for thread id variable. This LValue always has type int32*. virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); + virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} + CGOpenMPRegionKind getRegionKind() const { return RegionKind; } OpenMPDirectiveKind getDirectiveKind() const { return Kind; } @@ -82,6 +84,8 @@ public: return Info->getKind() == CR_OpenMP; } + ~CGOpenMPRegionInfo() override = default; + protected: CGOpenMPRegionKind RegionKind; RegionCodeGenTy CodeGen; @@ -90,7 +94,7 @@ protected: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPOutlinedRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, @@ -121,14 +125,62 @@ private: }; /// \brief API for captured statement code generation in OpenMP constructs. -class CGOpenMPTaskOutlinedRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { public: + class UntiedTaskActionTy final : public PrePostActionTy { + bool Untied; + const VarDecl *PartIDVar; + const RegionCodeGenTy &UntiedCodeGen; + llvm::SwitchInst *UntiedSwitch = nullptr; + + public: + UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, + const RegionCodeGenTy &UntiedCodeGen) + : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} + void Enter(CodeGenFunction &CGF) override { + if (Untied) { + // Emit task switching point. + auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs<PointerType>()); + auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation()); + auto *DoneBB = CGF.createBasicBlock(".untied.done."); + UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); + CGF.EmitBlock(DoneBB); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(0), + CGF.Builder.GetInsertBlock()); + emitUntiedSwitch(CGF); + } + } + void emitUntiedSwitch(CodeGenFunction &CGF) const { + if (Untied) { + auto PartIdLVal = CGF.EmitLoadOfPointerLValue( + CGF.GetAddrOfLocalVar(PartIDVar), + PartIDVar->getType()->castAs<PointerType>()); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + PartIdLVal); + UntiedCodeGen(CGF); + CodeGenFunction::JumpDest CurPoint = + CGF.getJumpDestInCurrentScope(".untied.next."); + CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); + CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); + UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), + CGF.Builder.GetInsertBlock()); + CGF.EmitBranchThroughCleanup(CurPoint); + CGF.EmitBlock(CurPoint.getBlock()); + } + } + unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } + }; CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, const RegionCodeGenTy &CodeGen, - OpenMPDirectiveKind Kind, bool HasCancel) + OpenMPDirectiveKind Kind, bool HasCancel, + const UntiedTaskActionTy &Action) : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), - ThreadIDVar(ThreadIDVar) { + ThreadIDVar(ThreadIDVar), Action(Action) { assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region."); } @@ -142,6 +194,10 @@ public: /// \brief Get the name of the capture helper. StringRef getHelperName() const override { return ".omp_outlined."; } + void emitUntiedSwitch(CodeGenFunction &CGF) override { + Action.emitUntiedSwitch(CGF); + } + static bool classof(const CGCapturedStmtInfo *Info) { return CGOpenMPRegionInfo::classof(Info) && cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == @@ -152,6 +208,8 @@ private: /// \brief A variable or parameter storing global thread id for OpenMP /// constructs. const VarDecl *ThreadIDVar; + /// Action for emitting code for untied tasks. + const UntiedTaskActionTy &Action; }; /// \brief API for inlined captured statement code generation in OpenMP @@ -210,6 +268,11 @@ public: llvm_unreachable("No helper name for inlined OpenMP construct"); } + void emitUntiedSwitch(CodeGenFunction &CGF) override { + if (OuterRegionInfo) + OuterRegionInfo->emitUntiedSwitch(CGF); + } + CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } static bool classof(const CGCapturedStmtInfo *Info) { @@ -217,6 +280,8 @@ public: cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; } + ~CGOpenMPInlinedRegionInfo() override = default; + private: /// \brief CodeGen info about outer OpenMP region. CodeGenFunction::CGCapturedStmtInfo *OldCSI; @@ -228,7 +293,7 @@ private: /// captured fields. The name of the target region has to be unique in a given /// application so it is provided by the client, because only the client has /// the information to generate that. -class CGOpenMPTargetRegionInfo : public CGOpenMPRegionInfo { +class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { public: CGOpenMPTargetRegionInfo(const CapturedStmt &CS, const RegionCodeGenTy &CodeGen, StringRef HelperName) @@ -257,7 +322,7 @@ static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { } /// \brief API for generation of expressions captured in a innermost OpenMP /// region. -class CGOpenMPInnerExprInfo : public CGOpenMPInlinedRegionInfo { +class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { public: CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, @@ -757,16 +822,36 @@ llvm::Value *CGOpenMPRuntime::emitParallelOrTeamsOutlinedFunction( llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + const VarDecl *PartIDVar, const VarDecl *TaskTVar, + OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, + bool Tied, unsigned &NumberOfParts) { + auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, + PrePostActionTy &) { + auto *ThreadID = getThreadID(CGF, D.getLocStart()); + auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart()); + llvm::Value *TaskArgs[] = { + UpLoc, ThreadID, + CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), + TaskTVar->getType()->castAs<PointerType>()) + .getPointer()}; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); + }; + CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, + UntiedCodeGen); + CodeGen.setAction(Action); assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); CodeGenFunction CGF(CGM, true); - CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, - InnermostKind, - cast<OMPTaskDirective>(D).hasCancel()); + CGOpenMPTaskOutlinedRegionInfo CGInfo( + *CS, ThreadIDVar, CodeGen, InnermostKind, + cast<OMPTaskDirective>(D).hasCancel(), Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateCapturedStmtFunction(*CS); + auto *Res = CGF.GenerateCapturedStmtFunction(*CS); + CodeGen.clearAction(); + if (!Tied) + NumberOfParts = Action.getNumberOfParts(); + return Res; } Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) { @@ -1898,6 +1983,8 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskyield), Args); + if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, @@ -2951,7 +3038,7 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// argument. /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { -/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, +/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, /// tt->shareds); /// return 0; /// } @@ -2982,7 +3069,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, - // tt->task_data.shareds); + // tt, tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( @@ -2995,7 +3082,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); - auto *PartidParam = CGF.EmitLoadOfLValue(PartIdLVal, Loc).getScalarVal(); + auto *PartidParam = PartIdLVal.getPointer(); auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); @@ -3014,7 +3101,11 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, } llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, - TaskPrivatesMap, SharedsParam}; + TaskPrivatesMap, + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TDBase.getAddress(), CGF.VoidPtrTy) + .getPointer(), + SharedsParam}; CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), @@ -3154,8 +3245,8 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1, void CGOpenMPRuntime::emitTaskCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, - llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, + unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies, ArrayRef<const Expr *> FirstprivateVars, ArrayRef<const Expr *> FirstprivateCopies, @@ -3390,7 +3481,8 @@ void CGOpenMPRuntime::emitTaskCall( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), ArrayType::Normal, /*IndexTypeQuals=*/0); // kmp_depend_info[<Dependences.size()>] deps; - DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy); + DependenciesArray = + CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); for (unsigned i = 0; i < NumDependencies; ++i) { const Expr *E = Dependences[i].second; auto Addr = CGF.EmitLValue(E); @@ -3448,8 +3540,6 @@ void CGOpenMPRuntime::emitTaskCall( // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() // libcall. - // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t - // *new_task); // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence @@ -3467,18 +3557,25 @@ void CGOpenMPRuntime::emitTaskCall( DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [NumDependencies, &TaskArgs, + auto &&ThenCodeGen = [this, Tied, Loc, NumberOfParts, TDBase, KmpTaskTQTyRD, + NumDependencies, &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { - // TODO: add check for untied tasks. - auto &RT = CGF.CGM.getOpenMPRuntime(); + if (!Tied) { + auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); + auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); + } if (NumDependencies) { CGF.EmitRuntimeCall( - RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), - DepTaskArgs); + createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), DepTaskArgs); } else { - CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_task), + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } + // Check if parent region is untied and build return for untied task; + if (auto *Region = + dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); }; llvm::Value *DepWaitTaskArgs[6]; @@ -4039,6 +4136,8 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; // Ignore return result until untied tasks are supported. CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_taskwait), Args); + if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); } void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, |