diff options
author | Alexey Bataev <a.bataev@hotmail.com> | 2019-03-21 19:35:27 +0000 |
---|---|---|
committer | Alexey Bataev <a.bataev@hotmail.com> | 2019-03-21 19:35:27 +0000 |
commit | c56872589f1593eb0e6ccd9ee2c3e1be0c947e08 (patch) | |
tree | ba3b62382592b0398821ef5f5243370f75ff34ac | |
parent | 5988d72243bc03cb3a0b3f141cf21fd54596bd63 (diff) | |
download | bcm5719-llvm-c56872589f1593eb0e6ccd9ee2c3e1be0c947e08.tar.gz bcm5719-llvm-c56872589f1593eb0e6ccd9ee2c3e1be0c947e08.zip |
[OPENMP]Codegen support for allocate directive on global variables.
For the global variables the allocate directive must specify only the
predefined allocator. This allocator must be translated into the correct
form of the address space for the targets that support different address
spaces.
llvm-svn: 356702
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 24 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.h | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 28 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 5 | ||||
-rw-r--r-- | clang/lib/CodeGen/CodeGenModule.cpp | 5 | ||||
-rw-r--r-- | clang/test/OpenMP/nvptx_allocate_codegen.cpp | 71 |
6 files changed, 138 insertions, 0 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 6dbc244bb29..d3736b7244d 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -8928,6 +8928,30 @@ void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas( " Expected target-based directive."); } +bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD, + LangAS &AS) { + if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) + return false; + const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); + switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + // Not supported, fallback to the default mem space. + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPConstMemAlloc: + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + AS = LangAS::Default; + return true; + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + llvm_unreachable("Expected predefined allocator for the variables with the " + "static storage."); + } + return false; +} + CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index 2896a659b98..7b2c0f1b914 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -1598,6 +1598,11 @@ public: /// Perform check on requires decl to ensure that target architecture /// supports unified addressing virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D) const {} + + /// Checks if the variable has associated OMPAllocateDeclAttr attribute with + /// the predefined allocator and translates it into the corresponding address + /// space. + virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS); }; /// Class supports emissionof SIMD-only code. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 59066e8813d..7de16032269 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -4840,6 +4840,34 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const { return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant); } +bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD, + LangAS &AS) { + if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>()) + return false; + const auto *A = VD->getAttr<OMPAllocateDeclAttr>(); + switch(A->getAllocatorType()) { + case OMPAllocateDeclAttr::OMPDefaultMemAlloc: + // Not supported, fallback to the default mem space. + case OMPAllocateDeclAttr::OMPThreadMemAlloc: + case OMPAllocateDeclAttr::OMPLargeCapMemAlloc: + case OMPAllocateDeclAttr::OMPCGroupMemAlloc: + case OMPAllocateDeclAttr::OMPHighBWMemAlloc: + case OMPAllocateDeclAttr::OMPLowLatMemAlloc: + AS = LangAS::Default; + return true; + case OMPAllocateDeclAttr::OMPConstMemAlloc: + AS = LangAS::cuda_constant; + return true; + case OMPAllocateDeclAttr::OMPPTeamMemAlloc: + AS = LangAS::cuda_shared; + return true; + case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc: + llvm_unreachable("Expected predefined allocator for the variables with the " + "static storage."); + } + return false; +} + // Get current CudaArch and ignore any unknown values static CudaArch getCudaArch(CodeGenModule &CGM) { if (!CGM.getTarget().hasFeature("ptx")) diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index 8a92c500b8f..6709ae322a6 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -389,6 +389,11 @@ public: /// address space by default. unsigned getDefaultFirstprivateAddressSpace() const override; + /// Checks if the variable has associated OMPAllocateDeclAttr attribute with + /// the predefined allocator and translates it into the corresponding address + /// space. + bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override; + private: /// Track the execution mode when codegening directives within a target /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 3a9df23a495..b9d4ee9f8c4 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -3387,6 +3387,11 @@ LangAS CodeGenModule::GetGlobalVarAddressSpace(const VarDecl *D) { return LangAS::cuda_device; } + if (LangOpts.OpenMP) { + LangAS AS; + if (OpenMPRuntime->hasAllocateAttributeForGlobalVar(D, AS)) + return AS; + } return getTargetCodeGenInfo().getGlobalVarAddressSpace(*this, D); } diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp new file mode 100644 index 00000000000..e9b9509334e --- /dev/null +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -0,0 +1,71 @@ +// RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc -o %t-host.bc %s +// RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#pragma omp declare target +typedef void **omp_allocator_handle_t; +extern const omp_allocator_handle_t omp_default_mem_alloc; +extern const omp_allocator_handle_t omp_large_cap_mem_alloc; +extern const omp_allocator_handle_t omp_const_mem_alloc; +extern const omp_allocator_handle_t omp_high_bw_mem_alloc; +extern const omp_allocator_handle_t omp_low_lat_mem_alloc; +extern const omp_allocator_handle_t omp_cgroup_mem_alloc; +extern const omp_allocator_handle_t omp_pteam_mem_alloc; +extern const omp_allocator_handle_t omp_thread_mem_alloc; + +// CHECK-DAG: @{{.+}}St1{{.+}}b{{.+}} = external global i32, +// CHECK-DAG: @a = global i32 0, +// CHECK-DAG: @b = addrspace(4) global i32 0, +// CHECK-DAG: @c = global i32 0, +// CHECK-DAG: @d = global %struct.St1 zeroinitializer, +// CHECK-DAG: @{{.+}}ns{{.+}}a{{.+}} = addrspace(3) global i32 0, +// CHECK-DAG: @{{.+}}main{{.+}}a{{.*}} = internal global i32 0, +// CHECK-DAG: @{{.+}}ST{{.+}}m{{.+}} = external global i32, +struct St{ + int a; +}; + +struct St1{ + int a; + static int b; +#pragma omp allocate(b) allocator(omp_default_mem_alloc) +} d; + +int a, b, c; +#pragma omp allocate(a) allocator(omp_large_cap_mem_alloc) +#pragma omp allocate(b) allocator(omp_const_mem_alloc) +#pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc) + +template <class T> +struct ST { + static T m; + #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc) +}; + +template <class T> T foo() { + T v; + #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc) + v = ST<T>::m; + return v; +} + +namespace ns{ + int a; +} +#pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc) + +int main () { + static int a; +#pragma omp allocate(a) allocator(omp_thread_mem_alloc) + a=2; + double b = 3; +#pragma omp allocate(b) + return (foo<int>()); +} + +extern template int ST<int>::m; +#pragma omp end declare target +#endif |