summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp219
1 files changed, 155 insertions, 64 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index b71965cfb53..637a86b4553 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -171,16 +171,23 @@ class CheckVarsEscapingDeclContext final
: public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
CodeGenFunction &CGF;
llvm::SetVector<const ValueDecl *> EscapedDecls;
+ llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
llvm::SmallPtrSet<const ValueDecl *, 4> IgnoredDecls;
bool AllEscaped = false;
RecordDecl *GlobalizedRD = nullptr;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
void markAsEscaped(const ValueDecl *VD) {
- if (IgnoredDecls.count(VD) ||
- (CGF.CapturedStmtInfo &&
- CGF.CapturedStmtInfo->lookup(cast<VarDecl>(VD))))
+ if (IgnoredDecls.count(VD))
return;
+ // Variables captured by value must be globalized.
+ if (auto *CSI = CGF.CapturedStmtInfo) {
+ if (const FieldDecl *FD = CGF.CapturedStmtInfo->lookup(cast<VarDecl>(VD))) {
+ if (FD->getType()->isReferenceType())
+ return;
+ EscapedParameters.insert(VD);
+ }
+ }
EscapedDecls.insert(VD);
}
@@ -385,12 +392,15 @@ public:
Visit(Child);
}
+ /// Returns the record that handles all the escaped local variables and used
+ /// instead of their original storage.
const RecordDecl *getGlobalizedRecord() {
if (!GlobalizedRD)
buildRecordForGlobalizedVars();
return GlobalizedRD;
}
+ /// Returns the field in the globalized record for the escaped variable.
const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
assert(GlobalizedRD &&
"Record for globalized variables must be generated already.");
@@ -400,9 +410,16 @@ public:
return I->getSecond();
}
+ /// Returns the list of the escaped local variables/parameters.
ArrayRef<const ValueDecl *> getEscapedDecls() const {
return EscapedDecls.getArrayRef();
}
+
+ /// Checks if the escaped local variable is actually a parameter passed by
+ /// value.
+ const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {
+ return EscapedParameters;
+ }
};
} // anonymous namespace
@@ -614,58 +631,14 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
- const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
- if (I == FunctionGlobalizedDecls.end())
- return;
- const RecordDecl *GlobalizedVarsRecord = I->getSecond().first;
- QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
-
- // Recover pointer to this function's global record. The runtime will
- // handle the specifics of the allocation of the memory.
- // Use actual memory size of the record including the padding
- // for alignment purposes.
- unsigned Alignment =
- CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
- unsigned GlobalRecordSize =
- CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
- GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
- // TODO: allow the usage of shared memory to be controlled by
- // the user, for now, default to global.
- llvm::Value *GlobalRecordSizeArg[] = {
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
- llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
- GlobalRecordSizeArg);
- llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
- FunctionToGlobalRecPtr.try_emplace(CGF.CurFn, GlobalRecValue);
-
- // Emit the "global alloca" which is a GEP from the global declaration record
- // using the pointer returned by the runtime.
- LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
- auto &Res = I->getSecond().second;
- for (auto &Rec : Res) {
- const FieldDecl *FD = Rec.second.first;
- LValue VarAddr = CGF.EmitLValueForField(Base, FD);
- Rec.second.second = VarAddr.getAddress();
- }
+ emitGenericVarsProlog(CGF, WST.Loc);
}
void CGOpenMPRuntimeNVPTX::emitGenericEntryFooter(CodeGenFunction &CGF,
EntryFunctionState &EST) {
- const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
- if (I != FunctionGlobalizedDecls.end()) {
- if (!CGF.HaveInsertPoint())
- return;
- auto I = FunctionToGlobalRecPtr.find(CGF.CurFn);
- if (I != FunctionToGlobalRecPtr.end()) {
- llvm::Value *Args[] = {I->getSecond()};
- CGF.EmitRuntimeCall(
- createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
- Args);
- }
- }
+ emitGenericVarsEpilog(CGF);
+ if (!CGF.HaveInsertPoint())
+ return;
if (!EST.ExitBB)
EST.ExitBB = CGF.createBasicBlock(".exit");
@@ -1207,6 +1180,24 @@ void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ SourceLocation Loc = D.getLocStart();
+
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ SourceLocation &Loc;
+
+ public:
+ NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+ void Enter(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsProlog(CGF, Loc);
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsEpilog(CGF);
+ }
+ } Action(Loc);
+ CodeGen.setAction(Action);
auto *OutlinedFun =
cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen));
@@ -1222,7 +1213,24 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+ SourceLocation Loc = D.getLocStart();
+ // Emit target region as a standalone region.
+ class NVPTXPrePostActionTy : public PrePostActionTy {
+ SourceLocation &Loc;
+
+ public:
+ NVPTXPrePostActionTy(SourceLocation &Loc) : Loc(Loc) {}
+ void Enter(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsProlog(CGF, Loc);
+ }
+ void Exit(CodeGenFunction &CGF) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsEpilog(CGF);
+ }
+ } Action(Loc);
+ CodeGen.setAction(Action);
llvm::Value *OutlinedFunVal = CGOpenMPRuntime::emitTeamsOutlinedFunction(
D, ThreadIDVar, InnermostKind, CodeGen);
llvm::Function *OutlinedFun = cast<llvm::Function>(OutlinedFunVal);
@@ -1233,6 +1241,73 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
return OutlinedFun;
}
+void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
+ SourceLocation Loc) {
+ CGBuilderTy &Bld = CGF.Builder;
+
+ const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+ if (I == FunctionGlobalizedDecls.end())
+ return;
+ const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord;
+ QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
+ unsigned GlobalRecordSize =
+ CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ // TODO: allow the usage of shared memory to be controlled by
+ // the user, for now, default to global.
+ llvm::Value *GlobalRecordSizeArg[] = {
+ llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+ CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
+ GlobalRecordSizeArg);
+ llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+ GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ LValue Base = CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
+ I->getSecond().GlobalRecordAddr = GlobalRecValue;
+
+ // Emit the "global alloca" which is a GEP from the global declaration record
+ // using the pointer returned by the runtime.
+ for (auto &Rec : I->getSecond().LocalVarData) {
+ bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
+ llvm::Value *ParValue;
+ if (EscapedParam) {
+ const auto *VD = cast<VarDecl>(Rec.first);
+ LValue ParLVal =
+ CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
+ ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
+ }
+ const FieldDecl *FD = Rec.second.first;
+ LValue VarAddr = CGF.EmitLValueForField(Base, FD);
+ Rec.second.second = VarAddr.getAddress();
+ if (EscapedParam) {
+ const auto *VD = cast<VarDecl>(Rec.first);
+ CGF.EmitStoreOfScalar(ParValue, VarAddr);
+ I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
+ }
+ }
+ I->getSecond().MappedParams->apply(CGF);
+}
+
+void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF) {
+ const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+ if (I != FunctionGlobalizedDecls.end() && I->getSecond().GlobalRecordAddr) {
+ I->getSecond().MappedParams->restore(CGF);
+ if (!CGF.HaveInsertPoint())
+ return;
+ CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+ I->getSecond().GlobalRecordAddr);
+ }
+}
+
void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
SourceLocation Loc,
@@ -1317,7 +1392,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
llvm::Value *PtrV = Bld.CreateBitCast(V, CGF.VoidPtrTy);
CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
Ctx.getPointerType(Ctx.VoidPtrTy));
- Idx++;
+ ++Idx;
}
}
@@ -2750,8 +2825,8 @@ void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
continue;
}
llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo(
- /*AddrSpace=*/0));
+ NativeArg,
+ NativeArg->getType()->getPointerElementType()->getPointerTo());
TargetArgs.emplace_back(
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
}
@@ -2788,7 +2863,7 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
auto *Fn = llvm::Function::Create(
CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
OutlinedParallelFn->getName() + "_wrapper", &CGM.getModule());
- CGM.SetInternalFunctionAttributes(/*D=*/GlobalDecl(), Fn, CGFI);
+ CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
@@ -2854,6 +2929,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
"Function is registered already.");
SmallVector<const ValueDecl *, 4> IgnoredDecls;
const Stmt *Body = nullptr;
+ bool NeedToDelayGlobalization = false;
if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
Body = FD->getBody();
} else if (const auto *BD = dyn_cast<BlockDecl>(D)) {
@@ -2861,6 +2937,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
} else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
Body = CD->getBody();
if (CGF.CapturedStmtInfo->getKind() == CR_OpenMP) {
+ NeedToDelayGlobalization = true;
if (const auto *CS = dyn_cast<CapturedStmt>(Body)) {
IgnoredDecls.reserve(CS->capture_size());
for (const auto &Capture : CS->captures())
@@ -2876,14 +2953,29 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
const RecordDecl *GlobalizedVarsRecord = VarChecker.getGlobalizedRecord();
if (!GlobalizedVarsRecord)
return;
- auto &Res =
- FunctionGlobalizedDecls
- .try_emplace(CGF.CurFn, GlobalizedVarsRecord, DeclToAddrMapTy())
- .first->getSecond()
- .second;
+ auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+ I->getSecond().MappedParams =
+ llvm::make_unique<CodeGenFunction::OMPMapVars>();
+ I->getSecond().GlobalRecord = GlobalizedVarsRecord;
+ I->getSecond().EscapedParameters.insert(
+ VarChecker.getEscapedParameters().begin(),
+ VarChecker.getEscapedParameters().end());
+ DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
- Res.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+ Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+ }
+ if (!NeedToDelayGlobalization) {
+ emitGenericVarsProlog(CGF, D->getLocStart());
+ struct GlobalizationScope final : EHScopeStack::Cleanup {
+ GlobalizationScope() = default;
+
+ void Emit(CodeGenFunction &CGF, Flags flags) override {
+ static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
+ .emitGenericVarsEpilog(CGF);
+ }
+ };
+ CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
}
}
@@ -2892,14 +2984,13 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
if (I == FunctionGlobalizedDecls.end())
return Address::invalid();
- auto VDI = I->getSecond().second.find(VD);
- if (VDI == I->getSecond().second.end())
+ auto VDI = I->getSecond().LocalVarData.find(VD);
+ if (VDI == I->getSecond().LocalVarData.end())
return Address::invalid();
return VDI->second.second;
}
void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
- FunctionToGlobalRecPtr.erase(CGF.CurFn);
FunctionGlobalizedDecls.erase(CGF.CurFn);
CGOpenMPRuntime::functionFinished(CGF);
}
OpenPOWER on IntegriCloud