summaryrefslogtreecommitdiffstats
path: root/clang/lib
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib')
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp90
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.h19
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp3
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h2
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp4
5 files changed, 113 insertions, 5 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index eb21bbde8d9..0897b4765e2 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -457,6 +457,26 @@ enum OpenMPLocationFlags : unsigned {
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
};
+namespace {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+/// Values for bit flags for marking which requires clauses have been used.
+enum OpenMPOffloadingRequiresDirFlags : int64_t {
+ /// flag undefined.
+ OMP_REQ_UNDEFINED = 0x000,
+ /// no requires clause present.
+ OMP_REQ_NONE = 0x001,
+ /// reverse_offload clause.
+ OMP_REQ_REVERSE_OFFLOAD = 0x002,
+ /// unified_address clause.
+ OMP_REQ_UNIFIED_ADDRESS = 0x004,
+ /// unified_shared_memory clause.
+ OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
+ /// dynamic_allocators clause.
+ OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
+};
+} // anonymous namespace
+
/// Describes ident structure that describes a source location.
/// All descriptions are taken from
/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
@@ -694,6 +714,8 @@ enum OpenMPRTLFunction {
// *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
// *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
OMPRTL__tgt_target_teams_nowait,
+ // Call to void __tgt_register_requires(int64_t flags);
+ OMPRTL__tgt_register_requires,
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_register_lib,
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
@@ -2296,6 +2318,14 @@ llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
break;
}
+ case OMPRTL__tgt_register_requires: {
+ // Build void __tgt_register_requires(int64_t flags);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty};
+ auto *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
+ break;
+ }
case OMPRTL__tgt_register_lib: {
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
QualType ParamTy =
@@ -6405,6 +6435,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction(
llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
assert(!ParentName.empty() && "Invalid target region parent name!");
+ HasEmittedTargetRegion = true;
emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
IsOffloadEntry, CodeGen);
}
@@ -8225,7 +8256,7 @@ public:
MapValuesArrayTy &Sizes,
MapFlagsArrayTy &Types) const {
// Map other list items in the map clause which are not captured variables
- // but "declare target link" global variables.,
+ // but "declare target link" global variables.
for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
for (const auto &L : C->component_lists()) {
if (!L.first)
@@ -9186,6 +9217,16 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
" Expected target-based directive.");
}
+void CGOpenMPRuntime::checkArchForUnifiedAddressing(
+ const OMPRequiresDecl *D) {
+ for (const OMPClause *Clause : D->clauselists()) {
+ if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+ HasRequiresUnifiedSharedMemory = true;
+ break;
+ }
+ }
+}
+
bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
LangAS &AS) {
if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
@@ -9244,6 +9285,47 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
return !AlreadyEmittedTargetFunctions.insert(Name).second;
}
+llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
+ // If we don't have entries or if we are emitting code for the device, we
+ // don't need to do anything.
+ if (CGM.getLangOpts().OMPTargetTriples.empty() ||
+ CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
+ (OffloadEntriesInfoManager.empty() &&
+ !HasEmittedDeclareTargetRegion &&
+ !HasEmittedTargetRegion))
+ return nullptr;
+
+ // Create and register the function that handles the requires directives.
+ ASTContext &C = CGM.getContext();
+
+ llvm::Function *RequiresRegFn;
+ {
+ CodeGenFunction CGF(CGM);
+ const auto &FI = CGM.getTypes().arrangeNullaryFunction();
+ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
+ std::string ReqName = getName({"omp_offloading", "requires_reg"});
+ RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
+ CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
+ OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
+ // TODO: check for other requires clauses.
+ // The requires directive takes effect only when a target region is
+ // present in the compilation unit. Otherwise it is ignored and not
+ // passed to the runtime. This avoids the runtime from throwing an error
+ // for mismatching requires clauses across compilation units that don't
+ // contain at least 1 target region.
+ assert((HasEmittedTargetRegion ||
+ HasEmittedDeclareTargetRegion ||
+ !OffloadEntriesInfoManager.empty()) &&
+ "Target or declare target region expected.");
+ if (HasRequiresUnifiedSharedMemory)
+ Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__tgt_register_requires),
+ llvm::ConstantInt::get(CGM.Int64Ty, Flags));
+ CGF.FinishFunction();
+ }
+ return RequiresRegFn;
+}
+
llvm::Function *CGOpenMPRuntime::emitRegistrationFunction() {
// If we have offloading in the current module, we need to emit the entries
// now and register the offloading descriptor.
@@ -10284,6 +10366,12 @@ void CGOpenMPRuntime::emitOutlinedFunctionCall(
emitCall(CGF, Loc, OutlinedFn, Args);
}
+void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
+ if (const auto *FD = dyn_cast<FunctionDecl>(D))
+ if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
+ HasEmittedDeclareTargetRegion = true;
+}
+
Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
const VarDecl *NativeParam,
const VarDecl *TargetParam) const {
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 42dc4d473b2..1a26620dc70 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -636,6 +636,17 @@ private:
/// must be emitted.
llvm::SmallDenseSet<const VarDecl *> DeferredGlobalVariables;
+ /// Flag for keeping track of weather a requires unified_shared_memory
+ /// directive is present.
+ bool HasRequiresUnifiedSharedMemory = false;
+
+ /// Flag for keeping track of weather a target region has been emitted.
+ bool HasEmittedTargetRegion = false;
+
+ /// Flag for keeping track of weather a device routine has been emitted.
+ /// Device routines are specific to the
+ bool HasEmittedDeclareTargetRegion = false;
+
/// Creates and registers offloading binary descriptor for the current
/// compilation unit. The function that does the registration is returned.
llvm::Function *createOffloadingBinaryDescriptorRegistration();
@@ -1434,6 +1445,10 @@ public:
/// \param GD Global to scan.
virtual bool emitTargetGlobal(GlobalDecl GD);
+ /// Creates and returns a registration function for when at least one
+ /// requires directives was used in the current module.
+ llvm::Function *emitRequiresDirectiveRegFun();
+
/// Creates the offloading descriptor in the event any target region
/// was emitted in the current module and return the function that registers
/// it.
@@ -1581,7 +1596,7 @@ public:
/// Emits OpenMP-specific function prolog.
/// Required for device constructs.
- virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {}
+ virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D);
/// Gets the OpenMP-specific address of the local variable.
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF,
@@ -1602,7 +1617,7 @@ public:
/// Perform check on requires decl to ensure that target architecture
/// supports unified addressing
- virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {}
+ virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D);
/// Checks if the variable has associated OMPAllocateDeclAttr attribute with
/// the predefined allocator and translates it into the corresponding address
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index e6f9d971d7d..5183af1bdbf 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4891,7 +4891,7 @@ static CudaArch getCudaArch(CodeGenModule &CGM) {
/// Check to see if target architecture supports unified addressing which is
/// a restriction for OpenMP requires clause "unified_shared_memory".
void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
- const OMPRequiresDecl *D) const {
+ const OMPRequiresDecl *D) {
for (const OMPClause *Clause : D->clauselists()) {
if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
switch (getCudaArch(CGM)) {
@@ -4936,6 +4936,7 @@ void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
}
}
}
+ CGOpenMPRuntime::checkArchForUnifiedAddressing(D);
}
/// Get number of SMs and number of blocks per SM.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index 6709ae322a6..e7fd458e727 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -383,7 +383,7 @@ public:
/// Perform check on requires decl to ensure that target architecture
/// supports unified addressing
- void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const override;
+ void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) override;
/// Returns default address space for the constant firstprivates, __constant__
/// address space by default.
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index f062f83425a..a16e7ce47f9 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -410,6 +410,10 @@ void CodeGenModule::Release() {
AddGlobalCtor(CudaCtorFunction);
}
if (OpenMPRuntime) {
+ if (llvm::Function *OpenMPRequiresDirectiveRegFun =
+ OpenMPRuntime->emitRequiresDirectiveRegFun()) {
+ AddGlobalCtor(OpenMPRequiresDirectiveRegFun, 0);
+ }
if (llvm::Function *OpenMPRegistrationFunction =
OpenMPRuntime->emitRegistrationFunction()) {
auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ?
OpenPOWER on IntegriCloud