summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp132
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h16
2 files changed, 109 insertions, 39 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index b75891e4f21..04de3956622 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -169,7 +169,7 @@ enum MachineConfiguration : unsigned {
LaneIDMask = WarpSize - 1,
/// Global memory alignment for performance.
- GlobalMemoryAlignment = 256,
+ GlobalMemoryAlignment = 128,
};
enum NamedBarrier : unsigned {
@@ -186,20 +186,30 @@ static bool stable_sort_comparator(const VarsDataTy P1, const VarsDataTy P2) {
static RecordDecl *buildRecordForGlobalizedVars(
ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
+ ArrayRef<const ValueDecl *> EscapedDeclsForTeams,
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
&MappedDeclsFields) {
- if (EscapedDecls.empty())
+ if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
return nullptr;
SmallVector<VarsDataTy, 4> GlobalizedVars;
for (const ValueDecl *D : EscapedDecls)
+ GlobalizedVars.emplace_back(
+ CharUnits::fromQuantity(std::max(
+ C.getDeclAlign(D).getQuantity(),
+ static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))),
+ D);
+ for (const ValueDecl *D : EscapedDeclsForTeams)
GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
std::stable_sort(GlobalizedVars.begin(), GlobalizedVars.end(),
stable_sort_comparator);
// Build struct _globalized_locals_ty {
- // /* globalized vars */
+ // /* globalized vars */[32] align (max(decl_align, 128))
+ // /* globalized vars */ for EscapedDeclsForTeams
// };
RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
GlobalizedRD->startDefinition();
+ llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
+ EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
for (const auto &Pair : GlobalizedVars) {
const ValueDecl *VD = Pair.second;
QualType Type = VD->getType();
@@ -208,19 +218,39 @@ static RecordDecl *buildRecordForGlobalizedVars(
else
Type = Type.getNonReferenceType();
SourceLocation Loc = VD->getLocation();
- auto *Field =
- FieldDecl::Create(C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
- C.getTrivialTypeSourceInfo(Type, SourceLocation()),
- /*BW=*/nullptr, /*Mutable=*/false,
- /*InitStyle=*/ICIS_NoInit);
- Field->setAccess(AS_public);
- GlobalizedRD->addDecl(Field);
- if (VD->hasAttrs()) {
- for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
- E(VD->getAttrs().end());
- I != E; ++I)
- Field->addAttr(*I);
+ FieldDecl *Field;
+ if (SingleEscaped.count(VD)) {
+ Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ if (VD->hasAttrs()) {
+ for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
+ E(VD->getAttrs().end());
+ I != E; ++I)
+ Field->addAttr(*I);
+ }
+ } else {
+ llvm::APInt ArraySize(32, WarpSize);
+ Type = C.getConstantArrayType(Type, ArraySize, ArrayType::Normal, 0);
+ Field = FieldDecl::Create(
+ C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+ C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+ /*BW=*/nullptr, /*Mutable=*/false,
+ /*InitStyle=*/ICIS_NoInit);
+ Field->setAccess(AS_public);
+ llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
+ static_cast<CharUnits::QuantityType>(
+ GlobalMemoryAlignment)));
+ Field->addAttr(AlignedAttr::CreateImplicit(
+ C, AlignedAttr::GNU_aligned, /*IsAlignmentExpr=*/true,
+ IntegerLiteral::Create(C, Align,
+ C.getIntTypeForBitwidth(32, /*Signed=*/0),
+ SourceLocation())));
}
+ GlobalizedRD->addDecl(Field);
MappedDeclsFields.try_emplace(VD, Field);
}
GlobalizedRD->completeDefinition();
@@ -344,7 +374,8 @@ class CheckVarsEscapingDeclContext final
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
GlobalizedRD = ::buildRecordForGlobalizedVars(
- CGF.getContext(), EscapedDecls.getArrayRef(), MappedDeclsFields);
+ CGF.getContext(), EscapedDecls.getArrayRef(), llvm::None,
+ MappedDeclsFields);
}
public:
@@ -1849,8 +1880,7 @@ getDistributeLastprivateVars(const OMPExecutableDirective &D,
}
if (!Dir)
return;
- for (const OMPLastprivateClause *C :
- Dir->getClausesOfKind<OMPLastprivateClause>()) {
+ for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) {
for (const Expr *E : C->getVarRefs()) {
const auto *DE = cast<DeclRefExpr>(E->IgnoreParens());
Vars.push_back(cast<ValueDecl>(DE->getDecl()->getCanonicalDecl()));
@@ -1869,8 +1899,8 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
getDistributeLastprivateVars(D, LastPrivates);
if (!LastPrivates.empty())
- GlobalizedRD = buildRecordForGlobalizedVars(
- CGM.getContext(), LastPrivates, MappedDeclsFields);
+ GlobalizedRD = ::buildRecordForGlobalizedVars(
+ CGM.getContext(), llvm::None, LastPrivates, MappedDeclsFields);
}
// Emit target region as a standalone region.
@@ -1899,9 +1929,9 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
for (const auto &Pair : MappedDeclsFields) {
assert(Pair.getFirst()->isCanonicalDecl() &&
"Expected canonical declaration");
- Data.insert(std::make_pair(
- Pair.getFirst(),
- std::make_pair(Pair.getSecond(), Address::invalid())));
+ Data.insert(std::make_pair(Pair.getFirst(),
+ MappedVarData(Pair.getSecond(),
+ /*IsOnePerTeam=*/true)));
}
}
Rt.emitGenericVarsProlog(CGF, Loc);
@@ -1935,18 +1965,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
if (I == FunctionGlobalizedDecls.end())
return;
if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
- QualType RecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+ QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
// Recover pointer to this function's global record. The runtime will
// handle the specifics of the allocation of the memory.
// Use actual memory size of the record including the padding
// for alignment purposes.
unsigned Alignment =
- CGM.getContext().getTypeAlignInChars(RecTy).getQuantity();
+ CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
unsigned GlobalRecordSize =
- CGM.getContext().getTypeSizeInChars(RecTy).getQuantity();
+ CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity();
GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+ llvm::PointerType *GlobalRecPtrTy =
+ CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
llvm::Value *GlobalRecCastAddr;
if (WithSPMDCheck ||
getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
@@ -1959,7 +1991,8 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
CGF.EmitBlock(SPMDBB);
- Address RecPtr = CGF.CreateMemTemp(RecTy, "_local_stack");
+ Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
+ CharUnits::fromQuantity(Alignment));
CGF.EmitBranch(ExitBB);
// There is no need to emit line number for unconditional branch.
(void)ApplyDebugLocation::CreateEmpty(CGF);
@@ -1974,9 +2007,9 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ GlobalRecValue, GlobalRecPtrTy);
CGF.EmitBlock(ExitBB);
- auto *Phi = Bld.CreatePHI(GlobalRecCastAddr->getType(),
+ auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
/*NumReservedValues=*/2, "_select_stack");
Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
@@ -1994,12 +2027,12 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
OMPRTL_NVPTX__kmpc_data_sharing_push_stack),
GlobalRecordSizeArg);
GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
- GlobalRecValue, CGF.ConvertTypeForMem(RecTy)->getPointerTo());
+ GlobalRecValue, GlobalRecPtrTy);
I->getSecond().GlobalRecordAddr = GlobalRecValue;
I->getSecond().IsInSPMDModeFlag = nullptr;
}
LValue Base =
- CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, RecTy);
+ CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy);
// Emit the "global alloca" which is a GEP from the global declaration
// record using the pointer returned by the runtime.
@@ -2012,9 +2045,34 @@ void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
}
- const FieldDecl *FD = Rec.second.first;
- LValue VarAddr = CGF.EmitLValueForField(Base, FD);
- Rec.second.second = VarAddr.getAddress();
+ LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD);
+ // Emit VarAddr basing on lane-id if required.
+ QualType VarTy;
+ if (Rec.second.IsOnePerTeam) {
+ Rec.second.PrivateAddr = VarAddr.getAddress();
+ VarTy = Rec.second.FD->getType();
+ } else {
+ llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
+ VarAddr.getAddress().getPointer(),
+ {Bld.getInt32(0), getNVPTXLaneID(CGF)});
+ Rec.second.PrivateAddr =
+ Address(Ptr, CGM.getContext().getDeclAlign(Rec.first));
+ VarTy =
+ Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
+ VarAddr = CGF.MakeAddrLValue(Rec.second.PrivateAddr, VarTy,
+ AlignmentSource::Decl);
+ }
+ if (WithSPMDCheck ||
+ getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown) {
+ assert(I->getSecond().IsInSPMDModeFlag &&
+ "Expected unknown execution mode or required SPMD check.");
+ Address GlobalPtr = Rec.second.PrivateAddr;
+ Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
+ Rec.second.PrivateAddr = Address(
+ Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
+ LocalAddr.getPointer(), GlobalPtr.getPointer()),
+ LocalAddr.getAlignment());
+ }
if (EscapedParam) {
const auto *VD = cast<VarDecl>(Rec.first);
CGF.EmitStoreOfScalar(ParValue, VarAddr);
@@ -4047,7 +4105,7 @@ void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
assert(VD->isCanonicalDecl() && "Expected canonical declaration");
const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
- Data.insert(std::make_pair(VD, std::make_pair(FD, Address::invalid())));
+ Data.insert(std::make_pair(VD, MappedVarData(FD)));
}
if (!NeedToDelayGlobalization) {
emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
@@ -4074,7 +4132,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
return Address::invalid();
auto VDI = I->getSecond().LocalVarData.find(VD);
if (VDI != I->getSecond().LocalVarData.end())
- return VDI->second.second;
+ return VDI->second.PrivateAddr;
if (VD->hasAttrs()) {
for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()),
E(VD->attr_end());
@@ -4083,7 +4141,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
->getCanonicalDecl());
if (VDI != I->getSecond().LocalVarData.end())
- return VDI->second.second;
+ return VDI->second.PrivateAddr;
}
}
return Address::invalid();
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index fc8cd2467bf..cdc856bbcd6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -373,9 +373,21 @@ private:
llvm::Function *createParallelDataSharingWrapper(
llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
+ /// The data for the single globalized variable.
+ struct MappedVarData {
+ /// Corresponding field in the global record.
+ const FieldDecl * FD = nullptr;
+ /// Corresponding address.
+ Address PrivateAddr = Address::invalid();
+ /// true, if only one element is required (for latprivates in SPMD mode),
+ /// false, if need to create based on the warp-size.
+ bool IsOnePerTeam = false;
+ MappedVarData() = delete;
+ MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false)
+ : FD(FD), IsOnePerTeam(IsOnePerTeam) {}
+ };
/// The map of local variables to their addresses in the global memory.
- using DeclToAddrMapTy = llvm::MapVector<const Decl *,
- std::pair<const FieldDecl *, Address>>;
+ using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
/// Set of the parameters passed by value escaping OpenMP context.
using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
struct FunctionData {
OpenPOWER on IntegriCloud