diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2015-09-10 08:12:02 +0000 |
commit | 2377fe95c6beeba5cbba356fee7fba32257bbce2 (patch) | |
tree | b853c3ba0847ca5371ae261d956726aa9d99223e /clang/lib/CodeGen/CGStmtOpenMP.cpp | |
parent | f054eca167ea834a168704ff7065a149d75e8f0f (diff) | |
download | bcm5719-llvm-2377fe95c6beeba5cbba356fee7fba32257bbce2.tar.gz bcm5719-llvm-2377fe95c6beeba5cbba356fee7fba32257bbce2.zip |
[OPENMP] Outlined function for parallel and other regions with list of captured variables.
Currently all variables used in OpenMP regions are captured into a record and passed to outlined functions in this record. It may result in some poor performance because of too complex analysis later in optimization passes. Patch makes to emit outlined functions for parallel-based regions with a list of captured variables. It reduces code for 2*n GEPs, stores and loads at least.
Codegen for task-based regions remains unchanged because runtime requires that all captured variables are passed in captured record.
llvm-svn: 247251
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 115 |
1 files changed, 113 insertions, 2 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 3ac3dfaf453..d7c411c073f 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -20,6 +20,115 @@ using namespace clang; using namespace CodeGen; +void CodeGenFunction::GenerateOpenMPCapturedVars( + const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { + const RecordDecl *RD = S.getCapturedRecordDecl(); + auto CurField = RD->field_begin(); + auto CurCap = S.captures().begin(); + for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), + E = S.capture_init_end(); + I != E; ++I, ++CurField, ++CurCap) { + if (CurField->hasCapturedVLAType()) { + auto VAT = CurField->getCapturedVLAType(); + CapturedVars.push_back(VLASizeMap[VAT->getSizeExpr()]); + } else if (CurCap->capturesThis()) + CapturedVars.push_back(CXXThisValue); + else + CapturedVars.push_back(EmitLValue(*I).getAddress().getPointer()); + } +} + +llvm::Function * +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { + assert( + CapturedStmtInfo && + "CapturedStmtInfo should be set when generating the captured function"); + const CapturedDecl *CD = S.getCapturedDecl(); + const RecordDecl *RD = S.getCapturedRecordDecl(); + assert(CD->hasBody() && "missing CapturedDecl body"); + + // Build the argument list. + ASTContext &Ctx = CGM.getContext(); + FunctionArgList Args; + Args.append(CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); + auto I = S.captures().begin(); + for (auto *FD : RD->fields()) { + QualType ArgType = FD->getType(); + IdentifierInfo *II = nullptr; + VarDecl *CapVar = nullptr; + if (I->capturesVariable()) { + CapVar = I->getCapturedVar(); + II = CapVar->getIdentifier(); + } else if (I->capturesThis()) + II = &getContext().Idents.get("this"); + else { + assert(I->capturesVariableArrayType()); + II = &getContext().Idents.get("vla"); + } + if (ArgType->isVariablyModifiedType()) + ArgType = getContext().getVariableArrayDecayedType(ArgType); + Args.push_back(ImplicitParamDecl::Create(getContext(), nullptr, + FD->getLocation(), II, ArgType)); + ++I; + } + Args.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); + + // Create the function declaration. + FunctionType::ExtInfo ExtInfo; + const CGFunctionInfo &FuncInfo = + CGM.getTypes().arrangeFreeFunctionDeclaration(Ctx.VoidTy, Args, ExtInfo, + /*IsVariadic=*/false); + llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); + + llvm::Function *F = llvm::Function::Create( + FuncLLVMTy, llvm::GlobalValue::InternalLinkage, + CapturedStmtInfo->getHelperName(), &CGM.getModule()); + CGM.SetInternalFunctionAttributes(CD, F, FuncInfo); + if (CD->isNothrow()) + F->addFnAttr(llvm::Attribute::NoUnwind); + + // Generate the function. + StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + CD->getBody()->getLocStart()); + unsigned Cnt = CD->getContextParamPosition(); + I = S.captures().begin(); + for (auto *FD : RD->fields()) { + LValue ArgLVal = + MakeAddrLValue(GetAddrOfLocalVar(Args[Cnt]), Args[Cnt]->getType(), + AlignmentSource::Decl); + if (FD->hasCapturedVLAType()) { + auto *ExprArg = + EmitLoadOfLValue(ArgLVal, SourceLocation()).getScalarVal(); + auto VAT = FD->getCapturedVLAType(); + VLASizeMap[VAT->getSizeExpr()] = ExprArg; + } else if (I->capturesVariable()) { + auto *Var = I->getCapturedVar(); + QualType VarTy = Var->getType(); + Address ArgAddr = ArgLVal.getAddress(); + if (!VarTy->isReferenceType()) { + ArgAddr = EmitLoadOfReference( + ArgAddr, ArgLVal.getType()->castAs<ReferenceType>()); + } + setAddrOfLocalVar(Var, ArgAddr); + } else { + // If 'this' is captured, load it into CXXThisValue. + assert(I->capturesThis()); + CXXThisValue = + EmitLoadOfLValue(ArgLVal, Args[Cnt]->getLocation()).getScalarVal(); + } + ++Cnt, ++I; + } + + PGO.assignRegionCounters(CD, F); + CapturedStmtInfo->EmitBody(*this, CD->getBody()); + FinishFunction(CD->getBodyRBrace()); + + return F; +} + //===----------------------------------------------------------------------===// // OpenMP Directive Emission //===----------------------------------------------------------------------===// @@ -246,6 +355,7 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { DeclRefExpr DRE(const_cast<VarDecl *>(VD), true, (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); MasterAddr = EmitLValue(&DRE).getAddress(); + LocalDeclMap.erase(VD); } else { MasterAddr = Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(VD) @@ -473,7 +583,8 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto CapturedStruct = CGF.GenerateCapturedStmtArgument(*CS); + llvm::SmallVector<llvm::Value *, 16> CapturedVars; + CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen); if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { @@ -497,7 +608,7 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF, } } CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn, - CapturedStruct, IfCond); + CapturedVars, IfCond); } void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |