diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
commit | 2377fe95c6beeba5cbba356fee7fba32257bbce2 (patch) | |
tree | b853c3ba0847ca5371ae261d956726aa9d99223e /clang/lib/CodeGen | |
parent | f054eca167ea834a168704ff7065a149d75e8f0f (diff) | |
download | bcm5719-llvm-2377fe95c6beeba5cbba356fee7fba32257bbce2.tar.gz bcm5719-llvm-2377fe95c6beeba5cbba356fee7fba32257bbce2.zip |
[OPENMP] Outlined function for parallel and other regions with list of captured variables.
Currently all variables used in OpenMP regions are captured into a record and passed to outlined functions in this record. It may result in some poor performance because of too complex analysis later in optimization passes. Patch makes to emit outlined functions for parallel-based regions with a list of captured variables. It reduces code for 2*n GEPs, stores and loads at least.
Codegen for task-based regions remains unchanged because runtime requires that all captured variables are passed in captured record.
llvm-svn: 247251
Diffstat (limited to 'clang/lib/CodeGen')
-rw-r--r-- | clang/lib/CodeGen/CGExpr.cpp | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 112 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 3 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 115 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenFunction.h | 5 |
5 files changed, 178 insertions, 62 deletions
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 5e6c4de4087..dd3c69ccf81 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2033,7 +2033,10 @@ LValue CodeGenFunction::EmitDeclRefLValue(const DeclRefExpr *E) { const Expr *Init = VD->getAnyInitializer(VD); if (Init && !isa<ParmVarDecl>(VD) && VD->getType()->isReferenceType() && VD->isUsableInConstantExpressions(getContext()) && - VD->checkInitIsICE()) { + VD->checkInitIsICE() && + // Do not emit if it is private OpenMP variable. + !(E->refersToEnclosingVariableOrCapture() && CapturedStmtInfo && + LocalDeclMap.count(VD))) { llvm::Constant *Val = CGM.EmitConstantValue(*VD->evaluateValue(), VD->getType(), this); assert(Val && "failed to emit reference constant expression"); diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index a0b3ee5ab01..79900778fdf 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -231,14 +231,18 @@ public: } // namespace +static LValue emitLoadOfPointerLValue(CodeGenFunction &CGF, Address PtrAddr, + QualType Ty) { + AlignmentSource Source; + CharUnits Align = CGF.getNaturalPointeeTypeAlignment(Ty, &Source); + return CGF.MakeAddrLValue(Address(CGF.Builder.CreateLoad(PtrAddr), Align), + Ty->getPointeeType(), Source); +} + LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { - return CGF.MakeNaturalAlignAddrLValue( - CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(getThreadIDVariable())), - getThreadIDVariable() - ->getType() - ->castAs<PointerType>() - ->getPointeeType()); + return emitLoadOfPointerLValue(CGF, + CGF.GetAddrOfLocalVar(getThreadIDVariable()), + getThreadIDVariable()->getType()); } void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { @@ -257,9 +261,9 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( CodeGenFunction &CGF) { - return CGF.MakeAddrLValue( - CGF.GetAddrOfLocalVar(getThreadIDVariable()), - getThreadIDVariable()->getType()); + return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), + getThreadIDVariable()->getType(), + AlignmentSource::Decl); } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) @@ -307,7 +311,7 @@ llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction( CodeGenFunction CGF(CGM, true); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - return CGF.GenerateCapturedStmtFunction(*CS); + return CGF.GenerateOpenMPCapturedStmtFunction(*CS); } llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( @@ -1168,25 +1172,25 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, - Address CapturedStruct, + ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) { auto *RTLoc = emitUpdateLocation(CGF, Loc); - auto &&ThenGen = - [this, OutlinedFn, CapturedStruct, RTLoc](CodeGenFunction &CGF) { - // Build call __kmpc_fork_call(loc, 1, microtask, - // captured_struct/*context*/) - llvm::Value *Args[] = { - RTLoc, - CGF.Builder.getInt32( - 1), // Number of arguments after 'microtask' argument - // (there is only one additional argument - 'context') - CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), - CGF.EmitCastToVoidPtr(CapturedStruct.getPointer())}; - auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); - CGF.EmitRuntimeCall(RTLFn, Args); - }; - auto &&ElseGen = [this, OutlinedFn, CapturedStruct, RTLoc, Loc]( - CodeGenFunction &CGF) { + auto &&ThenGen = [this, OutlinedFn, CapturedVars, + RTLoc](CodeGenFunction &CGF) { + // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); + llvm::Value *Args[] = { + RTLoc, + CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars + CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())}; + llvm::SmallVector<llvm::Value *, 16> RealArgs; + RealArgs.append(std::begin(Args), std::end(Args)); + RealArgs.append(CapturedVars.begin(), CapturedVars.end()); + + auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call); + CGF.EmitRuntimeCall(RTLFn, RealArgs); + }; + auto &&ElseGen = [this, OutlinedFn, CapturedVars, RTLoc, + Loc](CodeGenFunction &CGF) { auto ThreadID = getThreadID(CGF, Loc); // Build calls: // __kmpc_serialized_parallel(&Loc, GTid); @@ -1200,11 +1204,10 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4), /*Name*/ ".zero.addr"); CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); - llvm::Value *OutlinedFnArgs[] = { - ThreadIDAddr.getPointer(), - ZeroAddr.getPointer(), - CapturedStruct.getPointer() - }; + llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; + OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ZeroAddr.getPointer()); + OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs); // __kmpc_end_serialized_parallel(&Loc, GTid); @@ -1946,7 +1949,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); + /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy.withRestrict()); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); FunctionType::ExtInfo Info; @@ -1966,10 +1970,8 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); - auto *TaskTypeArgAddr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(&TaskTypeArg)); - LValue TDBase = - CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); + LValue TDBase = emitLoadOfPointerLValue( + CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); auto *KmpTaskTWithPrivatesQTyRD = cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); LValue Base = @@ -2014,7 +2016,8 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, - /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy); + /*Id=*/nullptr, + KmpTaskTWithPrivatesPtrQTy.withRestrict()); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); FunctionType::ExtInfo Info; @@ -2031,10 +2034,8 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, Args); - auto *TaskTypeArgAddr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(&TaskTypeArg)); - LValue Base = - CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy); + LValue Base = emitLoadOfPointerLValue( + CGF, CGF.GetAddrOfLocalVar(&TaskTypeArg), KmpTaskTWithPrivatesPtrQTy); auto *KmpTaskTWithPrivatesQTyRD = cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); @@ -2112,21 +2113,17 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, TaskPrivatesMapFnInfo, Args); // *privi = &.privates.privi; - auto *TaskPrivatesArgAddr = CGF.Builder.CreateLoad( - CGF.GetAddrOfLocalVar(&TaskPrivatesArg)); - LValue Base = - CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy); + LValue Base = emitLoadOfPointerLValue( + CGF, CGF.GetAddrOfLocalVar(&TaskPrivatesArg), TaskPrivatesArg.getType()); auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); Counter = 0; for (auto *Field : PrivatesQTyRD->fields()) { auto FieldLVal = CGF.EmitLValueForField(Base, Field); auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); - auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc); - CGF.EmitStoreOfScalar( - FieldLVal.getPointer(), - CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(), - RefLVal.getType()->getPointeeType())); + auto RefLoadLVal = + emitLoadOfPointerLValue(CGF, RefLVal.getAddress(), RefLVal.getType()); + CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal); ++Counter; } CGF.FinishFunction(); @@ -2271,10 +2268,13 @@ void CGOpenMPRuntime::emitTaskCall( // Copy shareds if there are any. Address KmpTaskSharedsPtr = Address::invalid(); if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { - KmpTaskSharedsPtr = Address(CGF.EmitLoadOfScalar( - CGF.EmitLValueForField( - TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), - Loc), CGF.getNaturalTypeAlignment(SharedsTy)); + KmpTaskSharedsPtr = + Address(CGF.EmitLoadOfScalar( + CGF.EmitLValueForField( + TDBase, *std::next(KmpTaskTQTyRD->field_begin(), + KmpTaskTShareds)), + Loc), + CGF.getNaturalTypeAlignment(SharedsTy)); CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy); } // Emit initial values for private copies (if any). diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index da6dc108c44..d3c7728c64f 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -405,7 +405,8 @@ public: /// virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, - Address CapturedStruct, const Expr *IfCond); + ArrayRef<llvm::Value *> CapturedVars, + const Expr *IfCond); /// \brief Emits a critical region. /// \param CriticalName Name of the critical region. diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 3ac3dfaf453..d7c411c073f 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -20,6 +20,115 @@ using namespace clang; using namespace CodeGen; +void CodeGenFunction::GenerateOpenMPCapturedVars( + const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + auto CurField = RD->field_begin(); + auto CurCap = S.captures().begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField, ++CurCap) { + if (CurField->hasCapturedVLAType()) { + auto VAT = CurField->getCapturedVLAType(); + CapturedVars.push_back(VLASizeMap[VAT->getSizeExpr()]); + } else if (CurCap->capturesThis()) + CapturedVars.push_back(CXXThisValue); + else + CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); + } +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + const RecordDecl *RD = S.getCapturedRecordDecl(); + assert(CD->hasBody() && "missing CapturedDecl body"); + + // Build the argument list. + ASTContext &Ctx = CGM.getContext(); + FunctionArgList Args; + Args.append(CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); + auto I = S.captures().begin(); + for (auto *FD : RD->fields()) { + QualType ArgType = FD->getType(); + IdentifierInfo *II = nullptr; + VarDecl *CapVar = nullptr; + if (I->capturesVariable()) { + CapVar = I->getCapturedVar(); + II = CapVar->getIdentifier(); + } else if (I->capturesThis()) + II = &getContext().Idents.get("this"); + else { + assert(I->capturesVariableArrayType()); + II = &getContext().Idents.get("vla"); + } + if (ArgType->isVariablyModifiedType()) + ArgType = getContext().getVariableArrayDecayedType(ArgType); + Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, + FD->getLocation(), II, ArgType)); + ++I; + } + Args.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); + + // Create the function declaration. + FunctionType::ExtInfo ExtInfo; + const CGFunctionInfo &FuncInfo = + CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, + /*IsVariadic=*/false); + llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); + + llvm::Function *F = llvm::Function::Create( + FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + CapturedStmtInfo->getHelperName(), &CGM.getModule()); + CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); + if (CD->isNothrow()) + F->addFnAttr(llvm::Attribute::NoUnwind); + + // Generate the function. + StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + CD->getBody()->getLocStart()); + unsigned Cnt = CD->getContextParamPosition(); + I = S.captures().begin(); + for (auto *FD : RD->fields()) { + LValue ArgLVal = + MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), + AlignmentSource::Decl); + if (FD->hasCapturedVLAType()) { + auto *ExprArg = + EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); + auto VAT = FD->getCapturedVLAType(); + VLASizeMap[VAT->getSizeExpr()] = ExprArg; + } else if (I->capturesVariable()) { + auto *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + Address ArgAddr = ArgLVal.getAddress(); + if (!VarTy->isReferenceType()) { + ArgAddr = EmitLoadOfReference( + ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); + } + setAddrOfLocalVar(Var, ArgAddr); + } else { + // If 'this' is captured, load it into CXXThisValue. + assert(I->capturesThis()); + CXXThisValue = + EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); + } + ++Cnt, ++I; + } + + PGO.assignRegionCounters(CD, F); + CapturedStmtInfo->EmitBody(*this, CD->getBody()); + FinishFunction(CD->getBodyRBrace()); + + return F; +} + //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// @@ -246,6 +355,7 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); MasterAddr = EmitLValue(&DRE).getAddress(); + LocalDeclMap.erase(VD); } else { MasterAddr = Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) @@ -473,7 +583,8 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { @@ -497,7 +608,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, } } CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, - CapturedStruct, IfCond); + CapturedVars, IfCond); } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 34112727a11..76296dee135 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2204,10 +2204,11 @@ public: LValue InitCapturedStruct(const CapturedStmt &S); llvm::Function *EmitCapturedStmt(const CapturedStmt &S, CapturedRegionKind K); - void GenerateCapturedStmtFunctionProlog(const CapturedStmt &S); - llvm::Function *GenerateCapturedStmtFunctionEpilog(const CapturedStmt &S); llvm::Function *GenerateCapturedStmtFunction(const CapturedStmt &S); Address GenerateCapturedStmtArgument(const CapturedStmt &S); + llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); + void GenerateOpenMPCapturedVars(const CapturedStmt &S, + SmallVectorImpl<llvm::Value *> &CapturedVars); /// \brief Perform element by element copying of arrays with type \a /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure /// generated by \a CopyGen. |