summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp79
1 files changed, 71 insertions, 8 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index 944130b1abf..7ae83773117 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -1972,6 +1972,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
return;
if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType SecGlobalRecTy;
// Recover pointer to this function's global record. The runtime will
// handle the specifics of the allocation of the memory.
@@ -1986,11 +1987,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
llvm::PointerType *GlobalRecPtrTy =
CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
llvm::Value *GlobalRecCastAddr;
+ llvm::Value *IsTTD = nullptr;
if (WithSPMDCheck ||
getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+ if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+ llvm::Value *ThreadID = getThreadID(CGF, Loc);
+ llvm::Value *PL = CGF.EmitRuntimeCall(
+ createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+ {RTLoc, ThreadID});
+ IsTTD = Bld.CreateIsNull(PL);
+ }
llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
@@ -2003,11 +2013,28 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(NonSPMDBB);
+ llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
+ if (const RecordDecl *SecGlobalizedVarsRecord =
+ I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
+ SecGlobalRecTy =
+ CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
+
+ // Recover pointer to this function's global record. The runtime will
+ // handle the specifics of the allocation of the memory.
+ // Use actual memory size of the record including the padding
+ // for alignment purposes.
+ unsigned Alignment =
+ CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
+ unsigned GlobalRecordSize =
+ CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
+ GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ Size = Bld.CreateSelect(
+ IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
+ }
// TODO: allow the usage of shared memory to be controlled by
// the user, for now, default to global.
llvm::Value *GlobalRecordSizeArg[] = {
- llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
- CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+ Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
llvm::Value *GlobalRecValue =
CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
@@ -2042,6 +2069,17 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
// Emit the "global alloca" which is a GEP from the global declaration
// record using the pointer returned by the runtime.
+ LValue SecBase;
+ decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
+ if (IsTTD) {
+ SecIt = I->getSecond().SecondaryLocalVarData->begin();
+ llvm::PointerType *SecGlobalRecPtrTy =
+ CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
+ SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
+ Bld.CreatePointerBitCastOrAddrSpaceCast(
+ I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
+ SecGlobalRecTy);
+ }
for (auto &Rec : I->getSecond().LocalVarData) {
bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
llvm::Value *ParValue;
@@ -2055,23 +2093,32 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
// Emit VarAddr basing on lane-id if required.
QualType VarTy;
if (Rec.second.IsOnePerTeam) {
- Rec.second.PrivateAddr = VarAddr.getAddress();
VarTy = Rec.second.FD->getType();
} else {
llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
VarAddr.getAddress().getPointer(),
{Bld.getInt32(0), getNVPTXLaneID(CGF)});
- Rec.second.PrivateAddr =
- Address(Ptr, CGM.getContext().getDeclAlign(Rec.first));
VarTy =
Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
- VarAddr = CGF.MakeAddrLValue(Rec.second.PrivateAddr, VarTy,
- AlignmentSource::Decl);
+ VarAddr = CGF.MakeAddrLValue(
+ Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
+ AlignmentSource::Decl);
}
+ Rec.second.PrivateAddr = VarAddr.getAddress();
if (WithSPMDCheck ||
- getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
assert(I->getSecond().IsInSPMDModeFlag &&
"Expected unknown execution mode or required SPMD check.");
+ if (IsTTD) {
+ assert(SecIt->second.IsOnePerTeam &&
+ "Secondary glob data must be one per team.");
+ LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
+ VarAddr.setAddress(
+ Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(),
+ VarAddr.getPointer()),
+ VarAddr.getAlignment()));
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ }
Address GlobalPtr = Rec.second.PrivateAddr;
Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
Rec.second.PrivateAddr = Address(
@@ -2084,6 +2131,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
CGF.EmitStoreOfScalar(ParValue, VarAddr);
I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress());
}
+ ++SecIt;
}
}
for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
@@ -4115,6 +4163,21 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
Data.insert(
std::make_pair(VD, MappedVarData(FD, IsInTargetMasterThreadRegion)));
}
+ if (!IsInTargetMasterThreadRegion && !NeedToDelayGlobalization &&
+ !IsInParallelRegion) {
+ CheckVarsEscapingDeclContext VarChecker(CGF);
+ VarChecker.Visit(Body);
+ I->getSecond().SecondaryGlobalRecord =
+ VarChecker.getGlobalizedRecord(/*IsInTargetMasterThreadRegion=*/true);
+ I->getSecond().SecondaryLocalVarData.emplace();
+ DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
+ for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+ assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+ const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+ Data.insert(std::make_pair(
+ VD, MappedVarData(FD, /*IsInTargetMasterThreadRegion=*/true)));
+ }
+ }
if (!NeedToDelayGlobalization) {
emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
struct GlobalizationScope final : EHScopeStack::Cleanup {
OpenPOWER on IntegriCloud