diff options
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 696 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 47 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 30 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 57 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 239 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 7 |
6 files changed, 582 insertions, 494 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 810a2db3a5b..dedffc2db5a 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -252,7 +252,7 @@ private: StringRef HelperName; }; -static void EmptyCodeGen(CodeGenFunction &) { +static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { llvm_unreachable("No codegen for expressions"); } /// \brief API for generation of expressions captured in a innermost OpenMP @@ -564,8 +564,33 @@ enum OpenMPRTLFunction { OMPRTL__tgt_unregister_lib, }; +/// A basic class for pre|post-action for advanced codegen sequence for OpenMP +/// region. +class CleanupTy final : public EHScopeStack::Cleanup { + PrePostActionTy *Action; + +public: + explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} + void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { + if (!CGF.HaveInsertPoint()) + return; + Action->Exit(CGF); + } +}; + } // anonymous namespace +void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { + CodeGenFunction::RunCleanupsScope Scope(CGF); + if (PrePostAction) { + CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); + Callback(CodeGen, CGF, *PrePostAction); + } else { + PrePostActionTy Action; + Callback(CodeGen, CGF, Action); + } +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { return CGF.EmitLoadOfPointerLValue( CGF.GetAddrOfLocalVar(getThreadIDVariable()), @@ -581,10 +606,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CGF.EHStack.pushTerminate(); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CodeGen(CGF); - } + CodeGen(CGF); CGF.EHStack.popTerminate(); } @@ -601,10 +623,6 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */, CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */, CGM.Int8PtrTy /* psource */, nullptr); - // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) - llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), - llvm::PointerType::getUnqual(CGM.Int32Ty)}; - Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); loadOffloadInfoMetadata(); @@ -896,10 +914,18 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { } llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { + if (!IdentTy) { + } return llvm::PointerType::getUnqual(IdentTy); } llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { + if (!Kmpc_MicroTy) { + // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) + llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), + llvm::PointerType::getUnqual(CGM.Int32Ty)}; + Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); + } return llvm::PointerType::getUnqual(Kmpc_MicroTy); } @@ -1644,12 +1670,10 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // the condition and the dead arm of the if/else. bool CondConstant; if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { - CodeGenFunction::RunCleanupsScope Scope(CGF); - if (CondConstant) { + if (CondConstant) ThenGen(CGF); - } else { + else ElseGen(CGF); - } return; } @@ -1662,26 +1686,16 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, // Emit the 'then' code. CGF.EmitBlock(ThenBlock); - { - CodeGenFunction::RunCleanupsScope ThenScope(CGF); - ThenGen(CGF); - } + ThenGen(CGF); CGF.EmitBranch(ContBlock); // Emit the 'else' code if present. - { - // There is no need to emit line number for unconditional branch. - auto NL = ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(ElseBlock); - } - { - CodeGenFunction::RunCleanupsScope ThenScope(CGF); - ElseGen(CGF); - } - { - // There is no need to emit line number for unconditional branch. - auto NL = ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBranch(ContBlock); - } + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBlock(ElseBlock); + ElseGen(CGF); + // There is no need to emit line number for unconditional branch. + (void)ApplyDebugLocation::CreateEmpty(CGF); + CGF.EmitBranch(ContBlock); // Emit the continuation block for code after the if. CGF.EmitBlock(ContBlock, /*IsFinished=*/true); } @@ -1693,34 +1707,36 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, if (!CGF.HaveInsertPoint()) return; auto *RTLoc = emitUpdateLocation(CGF, Loc); - auto &&ThenGen = [this, OutlinedFn, CapturedVars, - RTLoc](CodeGenFunction &CGF) { + RegionCodeGenTy ThenGen = [OutlinedFn, CapturedVars, + RTLoc](CodeGenFunction &CGF, PrePostActionTy &) { // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); + auto &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { RTLoc, CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars - CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; + CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; llvm::SmallVector<llvm::Value *, 16> RealArgs; RealArgs.append(std::begin(Args), std::end(Args)); RealArgs.append(CapturedVars.begin(), CapturedVars.end()); - auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); + auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call); CGF.EmitRuntimeCall(RTLFn, RealArgs); }; - auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, - Loc](CodeGenFunction &CGF) { - auto ThreadID = getThreadID(CGF, Loc); + RegionCodeGenTy ElseGen = [OutlinedFn, CapturedVars, RTLoc, + Loc](CodeGenFunction &CGF, PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); + auto ThreadID = RT.getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); llvm::Value *Args[] = {RTLoc, ThreadID}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), - Args); + CGF.EmitRuntimeCall( + RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args); // OutlinedFn(>id, &zero, CapturedStruct); - auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc); + auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); Address ZeroAddr = - CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), - /*Name*/ ".zero.addr"); + CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), + /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); @@ -1729,16 +1745,15 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); - llvm::Value *EndArgs[] = {emitUpdateLocation(CGF, Loc), ThreadID}; + llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), EndArgs); + RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel), + EndArgs); }; - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); + else ThenGen(CGF); - } } // If we're inside an (outlined) parallel region, use the region info's @@ -1790,21 +1805,39 @@ llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { } namespace { -template <size_t N> class CallEndCleanup final : public EHScopeStack::Cleanup { - llvm::Value *Callee; - llvm::Value *Args[N]; +/// Common pre(post)-action for different OpenMP constructs. +class CommonActionTy final : public PrePostActionTy { + llvm::Value *EnterCallee; + ArrayRef<llvm::Value *> EnterArgs; + llvm::Value *ExitCallee; + ArrayRef<llvm::Value *> ExitArgs; + bool Conditional; + llvm::BasicBlock *ContBlock = nullptr; public: - CallEndCleanup(llvm::Value *Callee, ArrayRef<llvm::Value *> CleanupArgs) - : Callee(Callee) { - assert(CleanupArgs.size() == N); - std::copy(CleanupArgs.begin(), CleanupArgs.end(), std::begin(Args)); + CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs, + llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs, + bool Conditional = false) + : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), + ExitArgs(ExitArgs), Conditional(Conditional) {} + void Enter(CodeGenFunction &CGF) override { + llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); + if (Conditional) { + llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); + auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); + ContBlock = CGF.createBasicBlock("omp_if.end"); + // Generate the branch (If-stmt) + CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); + CGF.EmitBlock(ThenBlock); + } } - - void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { - if (!CGF.HaveInsertPoint()) - return; - CGF.EmitRuntimeCall(Callee, Args); + void Done(CodeGenFunction &CGF) { + // Emit the rest of blocks/branches + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + void Exit(CodeGenFunction &CGF) override { + CGF.EmitRuntimeCall(ExitCallee, ExitArgs); } }; } // anonymous namespace @@ -1819,45 +1852,22 @@ void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, // Prepare arguments and build a call to __kmpc_critical if (!CGF.HaveInsertPoint()) return; - CodeGenFunction::RunCleanupsScope Scope(CGF); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), getCriticalRegionLock(CriticalName)}; + llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), + std::end(Args)); if (Hint) { - llvm::SmallVector<llvm::Value *, 8> ArgsWithHint(std::begin(Args), - std::end(Args)); - auto *HintVal = CGF.EmitScalarExpr(Hint); - ArgsWithHint.push_back( - CGF.Builder.CreateIntCast(HintVal, CGM.IntPtrTy, /*isSigned=*/false)); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical_with_hint), - ArgsWithHint); - } else - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_critical), Args); - // Build a call to __kmpc_end_critical - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_critical), - llvm::makeArrayRef(Args)); + EnterArgs.push_back(CGF.Builder.CreateIntCast( + CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false)); + } + CommonActionTy Action( + createRuntimeFunction(Hint ? OMPRTL__kmpc_critical_with_hint + : OMPRTL__kmpc_critical), + EnterArgs, createRuntimeFunction(OMPRTL__kmpc_end_critical), Args); + CriticalOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); } -static void emitIfStmt(CodeGenFunction &CGF, llvm::Value *IfCond, - OpenMPDirectiveKind Kind, SourceLocation Loc, - const RegionCodeGenTy &BodyOpGen) { - llvm::Value *CallBool = CGF.EmitScalarConversion( - IfCond, - CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true), - CGF.getContext().BoolTy, Loc); - - auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); - auto *ContBlock = CGF.createBasicBlock("omp_if.end"); - // Generate the branch (If-stmt) - CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); - CGF.EmitBlock(ThenBlock); - CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, Kind, BodyOpGen); - // Emit the rest of bblocks/branches - CGF.EmitBranch(ContBlock); - CGF.EmitBlock(ContBlock, true); -} - void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) { @@ -1869,18 +1879,12 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, // } // Prepare arguments and build a call to __kmpc_master llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - auto *IsMaster = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_master), Args); - typedef CallEndCleanup<std::extent<decltype(Args)>::value> - MasterCallEndCleanup; - emitIfStmt( - CGF, IsMaster, OMPD_master, Loc, [&](CodeGenFunction &CGF) -> void { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CGF.EHStack.pushCleanup<MasterCallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_master), - llvm::makeArrayRef(Args)); - MasterOpGen(CGF); - }); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args, + createRuntimeFunction(OMPRTL__kmpc_end_master), Args, + /*Conditional=*/true); + MasterOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_master, MasterOpGen); + Action.Done(CGF); } void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, @@ -1903,16 +1907,12 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, // TaskgroupOpGen(); // __kmpc_end_taskgroup(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_taskgroup - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args); - // Build a call to __kmpc_end_taskgroup - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), - llvm::makeArrayRef(Args)); - emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); - } + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args, + createRuntimeFunction(OMPRTL__kmpc_end_taskgroup), + Args); + TaskgroupOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); } /// Given an array of pointers to variables, project the address of a @@ -2008,22 +2008,16 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, } // Prepare arguments and build a call to __kmpc_single llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - auto *IsSingle = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_single), Args); - typedef CallEndCleanup<std::extent<decltype(Args)>::value> - SingleCallEndCleanup; - emitIfStmt( - CGF, IsSingle, OMPD_single, Loc, [&](CodeGenFunction &CGF) -> void { - CodeGenFunction::RunCleanupsScope Scope(CGF); - CGF.EHStack.pushCleanup<SingleCallEndCleanup>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single), - llvm::makeArrayRef(Args)); - SingleOpGen(CGF); - if (DidIt.isValid()) { - // did_it = 1; - CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); - } - }); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args, + createRuntimeFunction(OMPRTL__kmpc_end_single), Args, + /*Conditional=*/true); + SingleOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_single, SingleOpGen); + if (DidIt.isValid()) { + // did_it = 1; + CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); + } + Action.Done(CGF); // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, // <copy_func>, did_it); if (DidIt.isValid()) { @@ -2073,14 +2067,14 @@ void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, // OrderedOpGen(); // __kmpc_end_ordered(ident_t *, gtid); // Prepare arguments and build a call to __kmpc_ordered - CodeGenFunction::RunCleanupsScope Scope(CGF); if (IsThreads) { llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_ordered), Args); - // Build a call to __kmpc_end_ordered - CGF.EHStack.pushCleanup<CallEndCleanup<std::extent<decltype(Args)>::value>>( - NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_ordered), - llvm::makeArrayRef(Args)); + CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args, + createRuntimeFunction(OMPRTL__kmpc_end_ordered), + Args); + OrderedOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); + return; } emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); } @@ -2596,12 +2590,14 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { IdentInfo, C.CharTy); auto *UnRegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_unreg", [&](CodeGenFunction &CGF) { + CGM, ".omp_offloading.descriptor_unreg", + [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_unregister_lib), Desc); }); auto *RegFn = createOffloadingBinaryDescriptorFunction( - CGM, ".omp_offloading.descriptor_reg", [&](CodeGenFunction &CGF) { + CGM, ".omp_offloading.descriptor_reg", + [&](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitCallOrInvoke(createRuntimeFunction(OMPRTL__tgt_register_lib), Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); @@ -3469,19 +3465,19 @@ void CGOpenMPRuntime::emitTaskCall( DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ThenCodeGen = [this, NumDependencies, - &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) { - // TODO: add check for untied tasks. + RegionCodeGenTy ThenCodeGen = [NumDependencies, &TaskArgs, &DepTaskArgs]( + CodeGenFunction &CGF, PrePostActionTy &) { + // TODO: add check for untied tasks. + auto &RT = CGF.CGM.getOpenMPRuntime(); if (NumDependencies) { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), - DepTaskArgs); + CGF.EmitRuntimeCall( + RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps), + DepTaskArgs); } else { - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), + CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs); } }; - typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value> - IfCallEndCleanup; llvm::Value *DepWaitTaskArgs[6]; if (NumDependencies) { @@ -3492,38 +3488,41 @@ void CGOpenMPRuntime::emitTaskCall( DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry, - NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) { + RegionCodeGenTy ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, + TaskEntry, NumDependencies, &DepWaitTaskArgs]( + CodeGenFunction &CGF, PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); CodeGenFunction::RunCleanupsScope LocalScope(CGF); // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info // is specified. if (NumDependencies) - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), + CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps), DepWaitTaskArgs); + // Call proxy_task_entry(gtid, new_task); + RegionCodeGenTy CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy]( + CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; + CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + }; + // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), - TaskArgs); // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, // kmp_task_t *new_task); - CGF.EHStack.pushCleanup<IfCallEndCleanup>( - NormalAndEHCleanup, - createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), - llvm::makeArrayRef(TaskArgs)); - - // Call proxy_task_entry(gtid, new_task); - llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; - CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs); + CommonActionTy Action( + RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs, + RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs); + CodeGen.setAction(Action); + CodeGen(CGF); }; - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); + else ThenCodeGen(CGF); - } } /// \brief Emit reduction operation for each element of array (required for @@ -3714,6 +3713,25 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM, return Fn; } +static void emitSingleReductionCombiner(CodeGenFunction &CGF, + const Expr *ReductionOp, + const Expr *PrivateRef, + const DeclRefExpr *LHS, + const DeclRefExpr *RHS) { + if (PrivateRef->getType()->isArrayType()) { + // Emit reduction for array section. + auto *LHSVar = cast<VarDecl>(LHS->getDecl()); + auto *RHSVar = cast<VarDecl>(RHS->getDecl()); + EmitOMPAggregateReduction( + CGF, PrivateRef->getType(), LHSVar, RHSVar, + [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { + emitReductionCombiner(CGF, ReductionOp); + }); + } else + // Emit reduction for array subscript or single variable. + emitReductionCombiner(CGF, ReductionOp); +} + void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, @@ -3765,15 +3783,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - if ((*IPriv)->getType()->isArrayType()) { - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction( - CGF, (*IPriv)->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, - const Expr *) { emitReductionCombiner(CGF, E); }); - } else - emitReductionCombiner(CGF, E); + emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); ++IPriv; ++ILHS; ++IRHS; @@ -3863,40 +3874,32 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); CGF.EmitBlock(Case1BB); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); - llvm::Value *EndArgs[] = { - IdentTLoc, // ident_t *<loc> - ThreadId, // i32 <gtid> - Lock // kmp_critical_name *&<lock> - }; - CGF.EHStack - .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( - NormalAndEHCleanup, - createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait - : OMPRTL__kmpc_end_reduce), - llvm::makeArrayRef(EndArgs)); + // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); + llvm::Value *EndArgs[] = { + IdentTLoc, // ident_t *<loc> + ThreadId, // i32 <gtid> + Lock // kmp_critical_name *&<lock> + }; + RegionCodeGenTy CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps]( + CodeGenFunction &CGF, PrePostActionTy &Action) { auto IPriv = Privates.begin(); auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); for (auto *E : ReductionOps) { - if ((*IPriv)->getType()->isArrayType()) { - // Emit reduction for array section. - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction( - CGF, (*IPriv)->getType(), LHSVar, RHSVar, - [=](CodeGenFunction &CGF, const Expr *, const Expr *, - const Expr *) { emitReductionCombiner(CGF, E); }); - } else - // Emit reduction for array subscript or single variable. - emitReductionCombiner(CGF, E); + emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), + cast<DeclRefExpr>(*IRHS)); ++IPriv; ++ILHS; ++IRHS; } - } + }; + CommonActionTy Action( + nullptr, llvm::None, + createRuntimeFunction(WithNowait ? OMPRTL__kmpc_end_reduce_nowait + : OMPRTL__kmpc_end_reduce), + EndArgs); + CodeGen.setAction(Action); + CodeGen(CGF); CGF.EmitBranch(DefaultBB); @@ -3909,106 +3912,112 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); CGF.EmitBlock(Case2BB); - { - CodeGenFunction::RunCleanupsScope Scope(CGF); - if (!WithNowait) { - // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); - llvm::Value *EndArgs[] = { - IdentTLoc, // ident_t *<loc> - ThreadId, // i32 <gtid> - Lock // kmp_critical_name *&<lock> - }; - CGF.EHStack - .pushCleanup<CallEndCleanup<std::extent<decltype(EndArgs)>::value>>( - NormalAndEHCleanup, - createRuntimeFunction(OMPRTL__kmpc_end_reduce), - llvm::makeArrayRef(EndArgs)); - } + RegionCodeGenTy AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, + &ReductionOps](CodeGenFunction &CGF, + PrePostActionTy &Action) { auto ILHS = LHSExprs.begin(); auto IRHS = RHSExprs.begin(); auto IPriv = Privates.begin(); for (auto *E : ReductionOps) { - const Expr *XExpr = nullptr; - const Expr *EExpr = nullptr; - const Expr *UpExpr = nullptr; - BinaryOperatorKind BO = BO_Comma; - if (auto *BO = dyn_cast<BinaryOperator>(E)) { - if (BO->getOpcode() == BO_Assign) { - XExpr = BO->getLHS(); - UpExpr = BO->getRHS(); - } + const Expr *XExpr = nullptr; + const Expr *EExpr = nullptr; + const Expr *UpExpr = nullptr; + BinaryOperatorKind BO = BO_Comma; + if (auto *BO = dyn_cast<BinaryOperator>(E)) { + if (BO->getOpcode() == BO_Assign) { + XExpr = BO->getLHS(); + UpExpr = BO->getRHS(); } - // Try to emit update expression as a simple atomic. - auto *RHSExpr = UpExpr; - if (RHSExpr) { - // Analyze RHS part of the whole expression. - if (auto *ACO = dyn_cast<AbstractConditionalOperator>( - RHSExpr->IgnoreParenImpCasts())) { - // If this is a conditional operator, analyze its condition for - // min/max reduction operator. - RHSExpr = ACO->getCond(); - } - if (auto *BORHS = - dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { - EExpr = BORHS->getRHS(); - BO = BORHS->getOpcode(); - } + } + // Try to emit update expression as a simple atomic. + auto *RHSExpr = UpExpr; + if (RHSExpr) { + // Analyze RHS part of the whole expression. + if (auto *ACO = dyn_cast<AbstractConditionalOperator>( + RHSExpr->IgnoreParenImpCasts())) { + // If this is a conditional operator, analyze its condition for + // min/max reduction operator. + RHSExpr = ACO->getCond(); } - if (XExpr) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto &&AtomicRedGen = [this, BO, VD, IPriv, - Loc](CodeGenFunction &CGF, const Expr *XExpr, - const Expr *EExpr, const Expr *UpExpr) { - LValue X = CGF.EmitLValue(XExpr); - RValue E; - if (EExpr) - E = CGF.EmitAnyExpr(EExpr); - CGF.EmitOMPAtomicSimpleUpdateExpr( - X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, - [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate( - VD, [&CGF, VD, XRValue, Loc]() -> Address { - Address LHSTemp = CGF.CreateMemTemp(VD->getType()); - CGF.emitOMPSimpleStore( - CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, - VD->getType().getNonReferenceType(), Loc); - return LHSTemp; - }); - (void)PrivateScope.Privatize(); - return CGF.EmitAnyExpr(UpExpr); - }); - }; - if ((*IPriv)->getType()->isArrayType()) { - // Emit atomic reduction for array section. - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, - AtomicRedGen, XExpr, EExpr, UpExpr); - } else - // Emit atomic reduction for array subscript or single variable. - AtomicRedGen(CGF, XExpr, EExpr, UpExpr); - } else { - // Emit as a critical region. - auto &&CritRedGen = [this, E, Loc](CodeGenFunction &CGF, const Expr *, - const Expr *, const Expr *) { - emitCriticalRegion( - CGF, ".atomic_reduction", - [=](CodeGenFunction &CGF) { emitReductionCombiner(CGF, E); }, - Loc); - }; - if ((*IPriv)->getType()->isArrayType()) { - auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, - CritRedGen); - } else - CritRedGen(CGF, nullptr, nullptr, nullptr); + if (auto *BORHS = + dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { + EExpr = BORHS->getRHS(); + BO = BORHS->getOpcode(); } + } + if (XExpr) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto &&AtomicRedGen = [BO, VD, IPriv, + Loc](CodeGenFunction &CGF, const Expr *XExpr, + const Expr *EExpr, const Expr *UpExpr) { + LValue X = CGF.EmitLValue(XExpr); + RValue E; + if (EExpr) + E = CGF.EmitAnyExpr(EExpr); + CGF.EmitOMPAtomicSimpleUpdateExpr( + X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, + [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { + CodeGenFunction::OMPPrivateScope PrivateScope(CGF); + PrivateScope.addPrivate( + VD, [&CGF, VD, XRValue, Loc]() -> Address { + Address LHSTemp = CGF.CreateMemTemp(VD->getType()); + CGF.emitOMPSimpleStore( + CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, + VD->getType().getNonReferenceType(), Loc); + return LHSTemp; + }); + (void)PrivateScope.Privatize(); + return CGF.EmitAnyExpr(UpExpr); + }); + }; + if ((*IPriv)->getType()->isArrayType()) { + // Emit atomic reduction for array section. + auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, + AtomicRedGen, XExpr, EExpr, UpExpr); + } else + // Emit atomic reduction for array subscript or single variable. + AtomicRedGen(CGF, XExpr, EExpr, UpExpr); + } else { + // Emit as a critical region. + auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, + const Expr *, const Expr *) { + auto &RT = CGF.CGM.getOpenMPRuntime(); + RT.emitCriticalRegion( + CGF, ".atomic_reduction", + [=](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + emitReductionCombiner(CGF, E); + }, + Loc); + }; + if ((*IPriv)->getType()->isArrayType()) { + auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, + CritRedGen); + } else + CritRedGen(CGF, nullptr, nullptr, nullptr); + } ++ILHS; ++IRHS; ++IPriv; } - } + }; + if (!WithNowait) { + // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); + llvm::Value *EndArgs[] = { + IdentTLoc, // ident_t *<loc> + ThreadId, // i32 <gtid> + Lock // kmp_critical_name *&<lock> + }; + CommonActionTy Action(nullptr, llvm::None, + createRuntimeFunction(OMPRTL__kmpc_end_reduce), + EndArgs); + AtomicCodeGen.setAction(Action); + AtomicCodeGen(CGF); + } else + AtomicCodeGen(CGF); CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); @@ -4105,14 +4114,15 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, // kmp_int32 cncl_kind); if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { - auto &&ThenGen = [this, Loc, CancelRegion, - OMPRegionInfo](CodeGenFunction &CGF) { + RegionCodeGenTy ThenGen = [Loc, CancelRegion, OMPRegionInfo]( + CodeGenFunction &CGF, PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc), CGF.Builder.getInt32(getCancellationKind(CancelRegion))}; // Ignore return result until untied tasks are supported. - auto *Result = - CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_cancel), Args); + auto *Result = CGF.EmitRuntimeCall( + RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args); // if (__kmpc_cancel()) { // __kmpc_cancel_barrier(); // exit from construct; @@ -4123,7 +4133,7 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); // __kmpc_cancel_barrier(); - emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); + RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); // exit from construct; auto CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); @@ -4131,7 +4141,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.EmitBlock(ContBB, /*IsFinished=*/true); }; if (IfCond) - emitOMPIfClause(CGF, IfCond, ThenGen, [](CodeGenFunction &) {}); + emitOMPIfClause(CGF, IfCond, ThenGen, + [](CodeGenFunction &, PrePostActionTy &) {}); else ThenGen(CGF); } @@ -4167,21 +4178,9 @@ static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { assert(!ParentName.empty() && "Invalid target region parent name!"); - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - - // Emit target region as a standalone region. - auto &&CodeGen = [&CS, &D](CodeGenFunction &CGF) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(D, PrivateScope); - CGF.EmitOMPPrivateClause(D, PrivateScope); - (void)PrivateScope.Privatize(); - - CGF.EmitStmt(CS.getCapturedStmt()); - }; - emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); } @@ -4471,9 +4470,11 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, OffloadError); // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, &Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, - hasVLACaptures, Device, OutlinedFnID, OffloadError, - OffloadErrorQType, &D](CodeGenFunction &CGF) { + RegionCodeGenTy ThenGen = [&Ctx, &BasePointers, &Pointers, &Sizes, &MapTypes, + hasVLACaptures, Device, OutlinedFnID, OffloadError, + OffloadErrorQType, + &D](CodeGenFunction &CGF, PrePostActionTy &) { + auto &RT = CGF.CGM.getOpenMPRuntime(); unsigned PointerNumVal = BasePointers.size(); llvm::Value *PointerNum = CGF.Builder.getInt32(PointerNumVal); llvm::Value *BasePointersArray; @@ -4509,9 +4510,10 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, ConstSizes.push_back(cast<llvm::Constant>(S)); auto *SizesArrayInit = llvm::ConstantArray::get( - llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes); + llvm::ArrayType::get(CGF.CGM.SizeTy, ConstSizes.size()), + ConstSizes); auto *SizesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), SizesArrayInit->getType(), + CGF.CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, SizesArrayInit, ".offload_sizes"); SizesArrayGbl->setUnnamedAddr(true); @@ -4523,7 +4525,7 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Constant *MapTypesArrayInit = llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes); auto *MapTypesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), MapTypesArrayInit->getType(), + CGF.CGM.getModule(), MapTypesArrayInit->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapTypesArrayInit, ".offload_maptypes"); MapTypesArrayGbl->setUnnamedAddr(true); @@ -4532,65 +4534,65 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, for (unsigned i = 0; i < PointerNumVal; ++i) { llvm::Value *BPVal = BasePointers[i]; if (BPVal->getType()->isPointerTy()) - BPVal = CGF.Builder.CreateBitCast(BPVal, CGM.VoidPtrTy); + BPVal = CGF.Builder.CreateBitCast(BPVal, CGF.VoidPtrTy); else { assert(BPVal->getType()->isIntegerTy() && "If not a pointer, the value type must be an integer."); - BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGM.VoidPtrTy); + BPVal = CGF.Builder.CreateIntToPtr(BPVal, CGF.VoidPtrTy); } llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), + llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), BasePointersArray, 0, i); Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(BPVal, BPAddr); llvm::Value *PVal = Pointers[i]; if (PVal->getType()->isPointerTy()) - PVal = CGF.Builder.CreateBitCast(PVal, CGM.VoidPtrTy); + PVal = CGF.Builder.CreateBitCast(PVal, CGF.VoidPtrTy); else { assert(PVal->getType()->isIntegerTy() && "If not a pointer, the value type must be an integer."); - PVal = CGF.Builder.CreateIntToPtr(PVal, CGM.VoidPtrTy); + PVal = CGF.Builder.CreateIntToPtr(PVal, CGF.VoidPtrTy); } llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, + llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), PointersArray, 0, i); Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); CGF.Builder.CreateStore(PVal, PAddr); if (hasVLACaptures) { llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, + llvm::ArrayType::get(CGF.SizeTy, PointerNumVal), SizesArray, /*Idx0=*/0, /*Idx1=*/i); Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType())); CGF.Builder.CreateStore(CGF.Builder.CreateIntCast( - Sizes[i], CGM.SizeTy, /*isSigned=*/true), + Sizes[i], CGF.SizeTy, /*isSigned=*/true), SAddr); } } BasePointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), BasePointersArray, + llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), BasePointersArray, /*Idx0=*/0, /*Idx1=*/0); PointersArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, PointerNumVal), PointersArray, + llvm::ArrayType::get(CGF.VoidPtrTy, PointerNumVal), PointersArray, /*Idx0=*/0, /*Idx1=*/0); SizesArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.SizeTy, PointerNumVal), SizesArray, + llvm::ArrayType::get(CGF.SizeTy, PointerNumVal), SizesArray, /*Idx0=*/0, /*Idx1=*/0); MapTypesArray = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int32Ty, PointerNumVal), MapTypesArray, + llvm::ArrayType::get(CGF.Int32Ty, PointerNumVal), MapTypesArray, /*Idx0=*/0, /*Idx1=*/0); } else { - BasePointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - PointersArray = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - SizesArray = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo()); + BasePointersArray = llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy); + PointersArray = llvm::ConstantPointerNull::get(CGF.VoidPtrPtrTy); + SizesArray = llvm::ConstantPointerNull::get(CGF.SizeTy->getPointerTo()); MapTypesArray = - llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()); + llvm::ConstantPointerNull::get(CGF.Int32Ty->getPointerTo()); } // On top of the arrays that were filled up, the target offloading call @@ -4609,15 +4611,15 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, llvm::Value *DeviceID; if (Device) DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGM.Int32Ty, /*isSigned=*/true); + CGF.Int32Ty, /*isSigned=*/true); else DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF); // Return value of the runtime offloading call. llvm::Value *Return; - auto *NumTeams = emitNumTeamsClauseForTargetDirective(*this, CGF, D); - auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(*this, CGF, D); + auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); + auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); // If we have NumTeams defined this means that we have an enclosed teams // region. Therefore we also expect to have ThreadLimit defined. These two @@ -4633,12 +4635,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, BasePointersArray, PointersArray, SizesArray, MapTypesArray, NumTeams, ThreadLimit}; Return = CGF.EmitRuntimeCall( - createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); + RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); } else { llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, BasePointersArray, PointersArray, SizesArray, MapTypesArray}; - Return = CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_target), + Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target), OffloadingArgs); } @@ -4646,9 +4648,9 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, }; // Notify that the host version must be executed. - auto &&ElseGen = [this, OffloadError, - OffloadErrorQType](CodeGenFunction &CGF) { - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/-1u), + RegionCodeGenTy ElseGen = [OffloadError](CodeGenFunction &CGF, + PrePostActionTy &) { + CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u), OffloadError); }; @@ -4657,16 +4659,12 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // regardless of the conditional in the if clause if, e.g., the user do not // specify target triples. if (OutlinedFnID) { - if (IfCond) { + if (IfCond) emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen); - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); + else ThenGen(CGF); - } - } else { - CodeGenFunction::RunCleanupsScope Scope(CGF); + } else ElseGen(CGF); - } // Check the error code and execute the host version if required. auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed"); @@ -4708,8 +4706,10 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, llvm::Function *Fn; llvm::Constant *Addr; - emitTargetOutlinedFunction(*E, ParentName, Fn, Addr, - /*isOffloadEntry=*/true); + std::tie(Fn, Addr) = + CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( + CGM, cast<OMPTargetDirective>(*E), ParentName, + /*isOffloadEntry=*/true); assert(Fn && Addr && "Target region emission failed."); return; } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 1935e93e206..d2a1a243a8c 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -46,7 +46,44 @@ class Address; class CodeGenFunction; class CodeGenModule; -typedef llvm::function_ref<void(CodeGenFunction &)> RegionCodeGenTy; +/// A basic class for pre|post-action for advanced codegen sequence for OpenMP +/// region. +class PrePostActionTy { +public: + explicit PrePostActionTy() {} + virtual void Enter(CodeGenFunction &CGF) {} + virtual void Exit(CodeGenFunction &CGF) {} + virtual ~PrePostActionTy() {} +}; + +/// Class provides a way to call simple version of codegen for OpenMP region, or +/// an advanced with possible pre|post-actions in codegen. +class RegionCodeGenTy final { + intptr_t CodeGen; + typedef void (*CodeGenTy)(intptr_t, CodeGenFunction &, PrePostActionTy &); + CodeGenTy Callback; + mutable PrePostActionTy *PrePostAction; + RegionCodeGenTy() = delete; + RegionCodeGenTy &operator=(const RegionCodeGenTy &) = delete; + template <typename Callable> + static void CallbackFn(intptr_t CodeGen, CodeGenFunction &CGF, + PrePostActionTy &Action) { + return (*reinterpret_cast<Callable *>(CodeGen))(CGF, Action); + } + +public: + template <typename Callable> + RegionCodeGenTy( + Callable &&CodeGen, + typename std::enable_if< + !std::is_same<typename std::remove_reference<Callable>::type, + RegionCodeGenTy>::value>::type * = nullptr) + : CodeGen(reinterpret_cast<intptr_t>(&CodeGen)), + Callback(CallbackFn<typename std::remove_reference<Callable>::type>), + PrePostAction(nullptr) {} + void setAction(PrePostActionTy &Action) const { PrePostAction = &Action; } + void operator()(CodeGenFunction &CGF) const; +}; class CGOpenMPRuntime { protected: @@ -82,14 +119,14 @@ private: OpenMPDefaultLocMapTy OpenMPDefaultLocMap; Address getOrCreateDefaultLocation(unsigned Flags); - llvm::StructType *IdentTy; + llvm::StructType *IdentTy = nullptr; /// \brief Map for SourceLocation and OpenMP runtime library debug locations. typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy; OpenMPDebugLocMapTy OpenMPDebugLocMap; /// \brief The type for a microtask which gets passed to __kmpc_fork_call(). /// Original representation is: /// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...); - llvm::FunctionType *Kmpc_MicroTy; + llvm::FunctionType *Kmpc_MicroTy = nullptr; /// \brief Stores debug location and ThreadID for the function. struct DebugLocThreadIdTy { llvm::Value *DebugLoc; @@ -810,13 +847,15 @@ public: /// \param OutlinedFn Outlined function value to be defined by this call. /// \param OutlinedFnID Outlined function ID value to be defined by this call. /// \param IsOffloadEntry True if the outlined function is an offload entry. + /// \param CodeGen Code generation sequence for the \a D directive. /// An oulined function may not be an entry if, e.g. the if clause always /// evaluates to false. virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry); + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen); /// \brief Emit the target offloading code associated with \a D. The emitted /// code attempts offloading the execution to the device, an the event of diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 3a634ac3087..3b2b30cdb73 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -305,28 +305,32 @@ void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID, void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry) { + bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { if (!IsOffloadEntry) // Nothing to do. return; assert(!ParentName.empty() && "Invalid target region parent name!"); - const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt()); - EntryFunctionState EST; WorkerFunctionState WST(CGM); // Emit target region as a standalone region. - auto &&CodeGen = [&EST, &WST, &CS, &D, this](CodeGenFunction &CGF) { - CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(D, PrivateScope); - CGF.EmitOMPPrivateClause(D, PrivateScope); - (void)PrivateScope.Privatize(); - - emitEntryHeader(CGF, EST, WST); - CGF.EmitStmt(CS.getCapturedStmt()); - emitEntryFooter(CGF, EST); - }; + class NVPTXPrePostActionTy : public PrePostActionTy { + CGOpenMPRuntimeNVPTX &RT; + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST; + CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST; + + public: + NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT, + CGOpenMPRuntimeNVPTX::EntryFunctionState &EST, + CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST) + : RT(RT), EST(EST), WST(WST) {} + void Enter(CodeGenFunction &CGF) override { + RT.emitEntryHeader(CGF, EST, WST); + } + void Exit(CodeGenFunction &CGF) override { RT.emitEntryFooter(CGF, EST); } + } Action(*this, EST, WST); + CodeGen.setAction(Action); emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index b52bae04e0e..6ef3d3c08bd 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -24,6 +24,34 @@ namespace clang { namespace CodeGen { class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { +public: + class EntryFunctionState { + public: + llvm::BasicBlock *ExitBB; + + EntryFunctionState() : ExitBB(nullptr){}; + }; + + class WorkerFunctionState { + public: + llvm::Function *WorkerFn; + const CGFunctionInfo *CGFI; + + WorkerFunctionState(CodeGenModule &CGM); + + private: + void createWorkerFunction(CodeGenModule &CGM); + }; + + /// \brief Helper for target entry function. Guide the master and worker + /// threads to their respective locations. + void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, + WorkerFunctionState &WST); + + /// \brief Signal termination of OMP execution. + void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); + +private: // // NVPTX calls. // @@ -66,24 +94,6 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { // Outlined function for the workers to execute. llvm::GlobalVariable *WorkID; - class EntryFunctionState { - public: - llvm::BasicBlock *ExitBB; - - EntryFunctionState() : ExitBB(nullptr){}; - }; - - class WorkerFunctionState { - public: - llvm::Function *WorkerFn; - const CGFunctionInfo *CGFI; - - WorkerFunctionState(CodeGenModule &CGM); - - private: - void createWorkerFunction(CodeGenModule &CGM); - }; - /// \brief Initialize master-worker control state. void initializeEnvironment(); @@ -93,14 +103,6 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { /// \brief Helper for worker function. Emit body of worker loop. void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); - /// \brief Helper for target entry function. Guide the master and worker - /// threads to their respective locations. - void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, - WorkerFunctionState &WST); - - /// \brief Signal termination of OMP execution. - void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); - /// \brief Returns specified OpenMP runtime function for the current OpenMP /// implementation. Specialized for the NVPTX device. /// \param Function OpenMP runtime function. @@ -129,7 +131,8 @@ class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, - bool IsOffloadEntry) override; + bool IsOffloadEntry, + const RegionCodeGenTy &CodeGen) override; public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 30385cd8b92..87c9b3b4d1d 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -26,8 +26,7 @@ using namespace CodeGen; namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. -class OMPLexicalScope { - CodeGenFunction::LexicalScope Scope; +class OMPLexicalScope : public CodeGenFunction::LexicalScope { void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { for (const auto *C : S.clauses()) { if (auto *CPI = OMPClauseWithPreInit::get(C)) { @@ -48,10 +47,11 @@ class OMPLexicalScope { public: OMPLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) - : Scope(CGF, S.getSourceRange()) { + : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()) { emitPreInitStmt(CGF, S); } }; + } // namespace llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { @@ -1097,8 +1097,6 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). emitParallelOrTeamsOutlinedFunction(S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -1110,7 +1108,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, CGF, NumThreads, NumThreadsClause->getLocStart()); } if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { - CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); + CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); CGF.CGM.getOpenMPRuntime().emitProcBindClause( CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getLocStart()); } @@ -1122,14 +1120,17 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, break; } } + + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, CapturedVars, IfCond); } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { - OMPLexicalScope Scope(*this, S); // Emit parallel region as a standalone region. - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPPrivateScope PrivateScope(CGF); bool Copyins = CGF.EmitOMPCopyinClause(S); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); @@ -1465,7 +1466,7 @@ void CodeGenFunction::EmitOMPSimdFinal( } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { // if (PreCond) { // for (IV in 0..LastIteration) BODY; // <Final counter/linear vars updates>; @@ -1508,7 +1509,6 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { emitAlignedClause(CGF, S); CGF.EmitOMPLinearClauseInit(S); - bool HasLastprivateClause; { OMPPrivateScope LoopScope(CGF); emitPrivateLoopCounters(CGF, LoopScope, S.counters(), @@ -1516,7 +1516,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { emitPrivateLinearVars(CGF, S, LoopScope); CGF.EmitOMPPrivateClause(S, LoopScope); CGF.EmitOMPReductionClauseInit(S, LoopScope); - HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); + bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); (void)LoopScope.Privatize(); CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), @@ -1526,9 +1526,8 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { }, [](CodeGenFunction &) {}); // Emit final copy of the lastprivate variables at the end of loops. - if (HasLastprivateClause) { + if (HasLastprivateClause) CGF.EmitOMPLastprivateClauseFinal(S); - } CGF.EmitOMPReductionClauseFinal(S); emitPostUpdateForReductionClause( CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; }); @@ -1543,6 +1542,7 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { CGF.EmitBlock(ContBlock, true); } }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } @@ -1928,11 +1928,12 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) { void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + }; { OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); - }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, S.hasCancel()); } @@ -1945,11 +1946,12 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; + auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { + HasLastprivates = CGF.EmitOMPWorksharingLoop(S); + }; { OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S); - }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } @@ -1972,7 +1974,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { auto *Stmt = cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); auto *CS = dyn_cast<CompoundStmt>(Stmt); bool HasLastprivates = false; - auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, Stmt, CS, &HasLastprivates](CodeGenFunction &CGF, + PrePostActionTy &) { auto &C = CGF.CGM.getContext(); auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Emit helper vars inits. @@ -2112,10 +2115,10 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_section, CodeGen, S.hasCancel()); } @@ -2137,17 +2140,17 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { AssignmentOps.append(C->assignment_ops().begin(), C->assignment_ops().end()); } + // Emit code for 'single' region along with 'copyprivate' clauses + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + OMPPrivateScope SingleScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); + CGF.EmitOMPPrivateClause(S, SingleScope); + (void)SingleScope.Privatize(); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; { OMPLexicalScope Scope(*this, S); - // Emit code for 'single' region along with 'copyprivate' clauses - auto &&CodeGen = [&S](CodeGenFunction &CGF) { - CodeGenFunction::OMPPrivateScope SingleScope(CGF); - (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); - CGF.EmitOMPPrivateClause(S, SingleScope); - (void)SingleScope.Privatize(); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }; CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getLocStart(), CopyprivateVars, DestExprs, SrcExprs, AssignmentOps); @@ -2162,21 +2165,23 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getLocStart()); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; Expr *Hint = nullptr; if (auto *HintClause = S.getSingleClause<OMPHintClause>()) Hint = HintClause->getHint(); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitCriticalRegion(*this, S.getDirectiveName().getAsString(), CodeGen, S.getLocStart(), Hint); @@ -2186,8 +2191,7 @@ void CodeGenFunction::EmitOMPParallelForDirective( const OMPParallelForDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPWorksharingLoop(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen); @@ -2197,8 +2201,7 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( const OMPParallelForSimdDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'for' directive. - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPWorksharingLoop(S); }; emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen); @@ -2208,14 +2211,14 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( const OMPParallelSectionsDirective &S) { // Emit directive as a combined directive that consists of two implicit // directives: 'parallel' with 'sections' directive. - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { CGF.EmitSections(S); }; + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitSections(S); + }; emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // Emit outlined function for task construct. - OMPLexicalScope Scope(*this, S); auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); @@ -2265,46 +2268,47 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { } } auto &&CodeGen = [PartId, &S, &PrivateVars, &FirstprivateVars]( - CodeGenFunction &CGF) { + CodeGenFunction &CGF, PrePostActionTy &) { // Set proper addresses for generated private copies. auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); - OMPPrivateScope Scope(CGF); - if (!PrivateVars.empty() || !FirstprivateVars.empty()) { - auto *CopyFn = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); - auto *PrivatesPtr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); - // Map privates. - llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> - PrivatePtrs; - llvm::SmallVector<llvm::Value *, 16> CallArgs; - CallArgs.push_back(PrivatesPtr); - for (auto *E : PrivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr.getPointer()); - } - for (auto *E : FirstprivateVars) { - auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - Address PrivatePtr = - CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); - PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); - CallArgs.push_back(PrivatePtr.getPointer()); + { + OMPPrivateScope Scope(CGF); + if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + auto *CopyFn = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); + auto *PrivatesPtr = CGF.Builder.CreateLoad( + CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(2))); + // Map privates. + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; + llvm::SmallVector<llvm::Value *, 16> CallArgs; + CallArgs.push_back(PrivatesPtr); + for (auto *E : PrivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + for (auto *E : FirstprivateVars) { + auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + Address PrivatePtr = + CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType())); + PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); + CallArgs.push_back(PrivatePtr.getPointer()); + } + CGF.EmitRuntimeCall(CopyFn, CallArgs); + for (auto &&Pair : PrivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + } } - CGF.EmitRuntimeCall(CopyFn, CallArgs); - for (auto &&Pair : PrivatePtrs) { - Address Replacement(CGF.Builder.CreateLoad(Pair.second), - CGF.getContext().getDeclAlign(Pair.first)); - Scope.addPrivate(Pair.first, [Replacement]() { return Replacement; }); + (void)Scope.Privatize(); + if (*PartId) { + // TODO: emit code for untied tasks. } + CGF.EmitStmt(CS->getCapturedStmt()); } - (void)Scope.Privatize(); - if (*PartId) { - // TODO: emit code for untied tasks. - } - CGF.EmitStmt(CS->getCapturedStmt()); }; auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( S, *I, OMPD_task, CodeGen); @@ -2334,6 +2338,7 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { break; } } + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTaskCall( *this, S.getLocStart(), S, Tied, Final, OutlinedFn, SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies, FirstprivateVars, @@ -2355,10 +2360,11 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { void CodeGenFunction::EmitOMPTaskgroupDirective( const OMPTaskgroupDirective &S) { - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTaskgroupRegion(*this, CodeGen, S.getLocStart()); } @@ -2490,10 +2496,10 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) { void CodeGenFunction::EmitOMPDistributeDirective( const OMPDistributeDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitOMPDistributeLoop(S); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen, false); } @@ -2511,9 +2517,9 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { if (!S.getAssociatedStmt()) return; - OMPLexicalScope Scope(*this, S); auto *C = S.getSingleClause<OMPSIMDClause>(); - auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, + PrePostActionTy &Action) { if (C) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; @@ -2521,10 +2527,12 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { auto *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); CGF.EmitNounwindRuntimeCall(OutlinedFn, CapturedVars); } else { + Action.Enter(CGF); CGF.EmitStmt( cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); } }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitOrderedRegion(*this, CodeGen, S.getLocStart(), !C); } @@ -2970,18 +2978,39 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { } } - OMPLexicalScope Scope(*this, S); - auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF) { + auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, + PrePostActionTy &) { CGF.EmitStopPoint(CS); EmitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), S.getV(), S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), S.getLocStart()); }; + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); } +std::pair<llvm::Function * /*OutlinedFn*/, llvm::Constant * /*OutlinedFnID*/> +CodeGenFunction::EmitOMPTargetDirectiveOutlinedFunction( + CodeGenModule &CGM, const OMPTargetDirective &S, StringRef ParentName, + bool IsOffloadEntry) { + llvm::Function *OutlinedFn = nullptr; + llvm::Constant *OutlinedFnID = nullptr; + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + OMPPrivateScope PrivateScope(CGF); + (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); + CGF.EmitOMPPrivateClause(S, PrivateScope); + (void)PrivateScope.Privatize(); + + Action.Enter(CGF); + CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }; + // Emit target region as a standalone region. + CGM.getOpenMPRuntime().emitTargetOutlinedFunction( + S, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); + return std::make_pair(OutlinedFn, OutlinedFnID); +} + void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { - OMPLexicalScope Scope(*this, S); const CapturedStmt &CS = *cast<CapturedStmt>(S.getAssociatedStmt()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; @@ -3027,9 +3056,9 @@ void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { ParentName = CGM.getMangledName(GlobalDecl(cast<FunctionDecl>(CurFuncDecl))); - CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID, - IsOffloadEntry); - + std::tie(Fn, FnID) = EmitOMPTargetDirectiveOutlinedFunction( + CGM, S, ParentName, IsOffloadEntry); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitTargetCall(*this, S, Fn, FnID, IfCond, Device, CapturedVars); } @@ -3039,8 +3068,6 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - llvm::SmallVector<llvm::Value *, 16> CapturedVars; - CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto OutlinedFn = CGF.CGM.getOpenMPRuntime(). emitParallelOrTeamsOutlinedFunction(S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); @@ -3063,14 +3090,16 @@ static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, ThreadLimitVal, S.getLocStart()); } + OMPLexicalScope Scope(CGF, S); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, S, S.getLocStart(), OutlinedFn, CapturedVars); } void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { - LexicalScope Scope(*this, S.getSourceRange()); // Emit parallel region as a standalone region. - auto &&CodeGen = [&S](CodeGenFunction &CGF) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { OMPPrivateScope PrivateScope(CGF); (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); CGF.EmitOMPPrivateClause(S, PrivateScope); @@ -3112,10 +3141,12 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { // emit the code inside the construct for now - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_target_data, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + *this, OMPD_target_data, [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } void CodeGenFunction::EmitOMPTargetEnterDataDirective( @@ -3140,18 +3171,22 @@ void CodeGenFunction::EmitOMPTargetParallelForDirective( void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { // emit the code inside the construct for now - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_taskloop, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + *this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } void CodeGenFunction::EmitOMPTaskLoopSimdDirective( const OMPTaskLoopSimdDirective &S) { // emit the code inside the construct for now - auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_taskloop_simd, - [&CS](CodeGenFunction &CGF) { CGF.EmitStmt(CS->getCapturedStmt()); }); + *this, OMPD_taskloop_simd, [&S](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt( + cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); + }); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 85d5f7a4320..c4e5ecd9000 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2366,6 +2366,13 @@ public: void EmitOMPDistributeDirective(const OMPDistributeDirective &S); void EmitOMPDistributeLoop(const OMPDistributeDirective &S); + /// Emit outlined function for the target directive. + static std::pair<llvm::Function * /*OutlinedFn*/, + llvm::Constant * /*OutlinedFnID*/> + EmitOMPTargetDirectiveOutlinedFunction(CodeGenModule &CGM, + const OMPTargetDirective &S, + StringRef ParentName, + bool IsOffloadEntry); /// \brief Emit inner loop of the worksharing/simd construct. /// /// \param S Directive, for which the inner loop must be emitted. |